package main import ( "fmt" "net/http" "net/url" "strings" "sync" "time" "github.com/PuerkitoBio/goquery" ) // NextPageCache is a specialized cache for storing next page links type NextPageCache struct { mu sync.Mutex links map[string]string expiration time.Duration } // NewNextPageCache creates a new NextPageCache with a specified expiration duration func NewNextPageCache(expiration time.Duration) *NextPageCache { return &NextPageCache{ links: make(map[string]string), expiration: expiration, } } // Get retrieves the next page link for a given key from the cache func (npc *NextPageCache) Get(key CacheKey) (string, bool) { npc.mu.Lock() defer npc.mu.Unlock() link, exists := npc.links[npc.keyToString(key)] if !exists { return "", false } return link, true } // Set stores the next page link for a given key in the cache // Idk it maybye worth it to use "cache.go" for this func (npc *NextPageCache) Set(key CacheKey, link string) { npc.mu.Lock() defer npc.mu.Unlock() npc.links[npc.keyToString(key)] = link } // keyToString converts a CacheKey to a string representation func (npc *NextPageCache) keyToString(key CacheKey) string { return fmt.Sprintf("%s|%d|%t|%s|%s", key.Query, key.Page, key.Safe, key.Lang, key.Type) } var ( nextPageCache = NewNextPageCache(6 * time.Hour) // Cache with 6-hour expiration ) // PerformDeviantArtImageSearch performs a search on DeviantArt and returns a list of image results func PerformDeviantArtImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, time.Duration, error) { startTime := time.Now() cacheKey := CacheKey{ Query: query, Page: page, Safe: safe == "active", Lang: lang, Type: "deviantart", } // Check if the next page link is cached var searchURL string if page > 1 { if nextPageLink, found := nextPageCache.Get(cacheKey); found { searchURL = nextPageLink } else { return nil, 0, fmt.Errorf("next page link not found in cache") } } else { searchURL = buildDeviantArtSearchURL(query, page) } // Get the User-Agent string DeviantArtImageUserAgent, err := GetUserAgent("Image-Search-DeviantArt") if err != nil { return nil, 0, err } // Make the HTTP request with User-Agent header client := &http.Client{} req, err := http.NewRequest("GET", searchURL, nil) if err != nil { return nil, 0, fmt.Errorf("creating request: %v", err) } req.Header.Set("User-Agent", DeviantArtImageUserAgent) resp, err := client.Do(req) if err != nil { return nil, 0, fmt.Errorf("making request: %v", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode) } // Parse the HTML document doc, err := goquery.NewDocumentFromReader(resp.Body) if err != nil { return nil, 0, fmt.Errorf("loading HTML document: %v", err) } // Channel to receive valid image results resultsChan := make(chan ImageSearchResult) var wg sync.WaitGroup // Extract data using goquery doc.Find("div._2pZkk div div a").Each(func(i int, s *goquery.Selection) { // Skip images that are blurred (premium content) premiumText := s.Find("../div/div/div").Text() if strings.Contains(premiumText, "Watch the artist to view this deviation") { return } // Extract image source, fallback on data-src if necessary imgSrc, exists := s.Find("div img").Attr("srcset") if !exists { imgSrc, exists = s.Find("div img").Attr("data-src") } if !exists || imgSrc == "" { return } imgSrc = strings.Split(imgSrc, " ")[0] parsedURL, err := url.Parse(imgSrc) if err == nil { parts := strings.Split(parsedURL.Path, "/v1") parsedURL.Path = parts[0] imgSrc = parsedURL.String() } // Extract URL and title resultURL := s.AttrOr("href", "") title := s.AttrOr("aria-label", "") // Only proceed if title, URL, and img_src are not empty if title != "" && resultURL != "" && imgSrc != "" { wg.Add(1) go func(imgSrc, resultURL, title string) { defer wg.Done() // Verify if the image URL is accessible if isValidImageURL(imgSrc, DeviantArtImageUserAgent, resultURL) { resultsChan <- ImageSearchResult{ Title: strings.TrimSpace(title), Media: imgSrc, Width: 0, Height: 0, Source: resultURL, ThumbProxy: "/imgproxy?url=" + imgSrc, } } }(imgSrc, resultURL, title) } }) // Close the results channel when all goroutines are done go func() { wg.Wait() close(resultsChan) }() // Collect results from the channel var results []ImageSearchResult for result := range resultsChan { results = append(results, result) } // Cache the next page link, if any nextPageLink := doc.Find("a._1OGeq").Last().AttrOr("href", "") if nextPageLink != "" { nextPageCache.Set(cacheKey, nextPageLink) } duration := time.Since(startTime) // Check if the number of results is one or less if len(results) == 0 { return nil, duration, fmt.Errorf("no images found") } return results, duration, nil } // buildDeviantArtSearchURL builds the search URL for DeviantArt func buildDeviantArtSearchURL(query string, page int) string { baseURL := "https://www.deviantart.com/search" params := url.Values{} params.Add("q", query) return baseURL + "?" + params.Encode() } // isValidImageURL checks if the image URL is accessible with the provided User-Agent func isValidImageURL(imgSrc, userAgent, referer string) bool { client := &http.Client{} req, err := http.NewRequest("HEAD", imgSrc, nil) if err != nil { return false } // Set headers to mimic a regular browser request req.Header.Set("User-Agent", userAgent) req.Header.Set("Referer", referer) resp, err := client.Do(req) if err != nil { return false } defer resp.Body.Close() return resp.StatusCode == http.StatusOK } // // Example usage: // func main() { // results, duration, err := PerformDeviantArtImageSearch("kittens", "false", "en", 1) // if err != nil { // fmt.Println("Error:", err) // return // } // fmt.Printf("Search took: %v\n", duration) // fmt.Printf("Total results: %d\n", len(results)) // for _, result := range results { // fmt.Printf("Title: %s\nThumbnail: %s\nMedia: %s\nSource (Original Image URL): %s\n\n", // result.Title, result.Thumbnail, result.Media, result.Source) // } // }