From 5d0525b1e9bce6eec7bccc83ab645d77e0c41118 Mon Sep 17 00:00:00 2001 From: admin Date: Mon, 10 Jun 2024 11:49:40 +0200 Subject: [PATCH] added imgur + some cleanup --- common.go | 24 +++++++ go.mod | 1 + go.sum | 2 + images-imgur.go | 143 +++++++++++++++++++++++++++++++++++++++ images-quant.go | 95 ++++++++++++++++++++++++++ images.go | 174 ++++++++++++++++-------------------------------- run.sh | 2 +- text.go | 10 --- 8 files changed, 324 insertions(+), 127 deletions(-) create mode 100644 common.go create mode 100644 images-imgur.go create mode 100644 images-quant.go diff --git a/common.go b/common.go new file mode 100644 index 0000000..d1de775 --- /dev/null +++ b/common.go @@ -0,0 +1,24 @@ +package main + +import ( + "html/template" +) + +var ( + debugMode bool = true + funcs = template.FuncMap{ + "sub": func(a, b int) int { + return a - b + }, + "add": func(a, b int) int { + return a + b + }, + } +) + +func max(a, b int) int { + if a > b { + return a + } + return b +} diff --git a/go.mod b/go.mod index 0cca960..63d4e99 100644 --- a/go.mod +++ b/go.mod @@ -16,4 +16,5 @@ require ( github.com/mailru/easyjson v0.7.7 // indirect golang.org/x/net v0.21.0 // indirect golang.org/x/sys v0.17.0 // indirect + golang.org/x/time v0.5.0 // indirect ) diff --git a/go.sum b/go.sum index f919d3c..77a830d 100644 --- a/go.sum +++ b/go.sum @@ -55,6 +55,8 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= +golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= diff --git a/images-imgur.go b/images-imgur.go new file mode 100644 index 0000000..2e76879 --- /dev/null +++ b/images-imgur.go @@ -0,0 +1,143 @@ +package main + +import ( + "fmt" + "net/http" + "net/url" + "strconv" + "strings" + + "github.com/PuerkitoBio/goquery" +) + +// PerformImgurImageSearch performs an image search on Imgur and returns the results +func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, error) { + var results []ImageSearchResult + searchURL := buildImgurSearchURL(query, page) + + resp, err := http.Get(searchURL) + if err != nil { + return nil, fmt.Errorf("making request: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + } + + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + return nil, fmt.Errorf("loading HTML document: %v", err) + } + + doc.Find("div.cards div.post").Each(func(i int, s *goquery.Selection) { + thumbnailSrc, exists := s.Find("a img").Attr("src") + if !exists || len(thumbnailSrc) < 25 { + return + } + imgSrc := strings.Replace(thumbnailSrc, "b.", ".", 1) + + // Ensure the URLs have the correct protocol + if !strings.HasPrefix(thumbnailSrc, "http") { + thumbnailSrc = "https:" + thumbnailSrc + } + if !strings.HasPrefix(imgSrc, "http") { + imgSrc = "https:" + imgSrc + } + + urlPath, exists := s.Find("a").Attr("href") + if !exists { + return + } + + // Scrape the image directly from the Imgur page + imgSrc = scrapeImageFromImgurPage("https://imgur.com" + urlPath) + + // Remove any query parameters from the URL + imgSrc = removeQueryParameters(imgSrc) + + title, _ := s.Find("a img").Attr("alt") + + width, _ := strconv.Atoi(s.Find("a img").AttrOr("width", "0")) + height, _ := strconv.Atoi(s.Find("a img").AttrOr("height", "0")) + + results = append(results, ImageSearchResult{ + Thumbnail: thumbnailSrc, + Title: strings.TrimSpace(title), + Media: imgSrc, + Width: width, + Height: height, + Source: "https://imgur.com" + urlPath, + ThumbProxy: imgSrc, //"/img_proxy?url=" + url.QueryEscape(imgSrc) + }) + }) + + return results, nil +} + +// scrapeImageFromImgurPage scrapes the image source from the Imgur page +func scrapeImageFromImgurPage(pageURL string) string { + resp, err := http.Get(pageURL) + if err != nil { + fmt.Printf("Error fetching page: %v\n", err) + return "" + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + fmt.Printf("Unexpected status code: %d\n", resp.StatusCode) + return "" + } + + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + fmt.Printf("Error loading HTML document: %v\n", err) + return "" + } + + imgSrc, exists := doc.Find("meta[property='og:image']").Attr("content") + if !exists { + fmt.Printf("Image not found on page: %s\n", pageURL) + return "" + } + + // Ensure the URL has the correct protocol + if !strings.HasPrefix(imgSrc, "http") { + imgSrc = "https:" + imgSrc + } + + return imgSrc +} + +// removeQueryParameters removes query parameters from a URL +func removeQueryParameters(rawURL string) string { + parsedURL, err := url.Parse(rawURL) + if err != nil { + fmt.Printf("Error parsing URL: %v\n", err) + return rawURL + } + parsedURL.RawQuery = "" + return parsedURL.String() +} + +func buildImgurSearchURL(query string, page int) string { + baseURL := "https://imgur.com/search/score/all" + params := url.Values{} + params.Add("q", query) + params.Add("qs", "thumbs") + params.Add("p", fmt.Sprintf("%d", page-1)) + return fmt.Sprintf("%s?%s", baseURL, params.Encode()) +} + +// func main() { +// results, err := PerformImgurImageSearch("cats", "true", "en", 1) +// if err != nil { +// fmt.Println("Error:", err) +// return +// } + +// for _, result := range results { +// fmt.Printf("Title: %s\nSource: %s\nMedia: %s\nThumbnail: %s\nThumbProxy: %s\nWidth: %d\nHeight: %d\n\n", +// result.Title, result.Source, result.Media, result.Thumbnail, result.ThumbProxy, result.Width, result.Height) +// } +// } diff --git a/images-quant.go b/images-quant.go new file mode 100644 index 0000000..fa799f8 --- /dev/null +++ b/images-quant.go @@ -0,0 +1,95 @@ +package main + +import ( + "encoding/json" + "fmt" + "net/http" + "net/url" + "time" +) + +// QwantAPIResponse represents the JSON response structure from Qwant API +type QwantAPIResponse struct { + Data struct { + Result struct { + Items []struct { + Media string `json:"media"` + Thumbnail string `json:"thumbnail"` + Title string `json:"title"` + Url string `json:"url"` + Width int `json:"width"` + Height int `json:"height"` + } `json:"items"` + } `json:"result"` + } `json:"data"` +} + +// PerformQwantImageSearch performs an image search on Qwant and returns the results. +func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, error) { + const resultsPerPage = 50 + var offset int + if page <= 1 { + offset = 0 + } else { + offset = (page - 1) * resultsPerPage + } + + if safe == "" { + safe = "0" + } + + if lang == "" { + lang = "en_CA" + } + + apiURL := fmt.Sprintf("https://api.qwant.com/v3/search/images?t=images&q=%s&count=%d&locale=%s&offset=%d&device=desktop&tgp=2&safesearch=%s", + url.QueryEscape(query), + resultsPerPage, + lang, + offset, + safe) + + client := &http.Client{Timeout: 10 * time.Second} + + req, err := http.NewRequest("GET", apiURL, nil) + if err != nil { + return nil, fmt.Errorf("creating request: %v", err) + } + + ImageUserAgent, err := GetUserAgent("Image-Search") + if err != nil { + return nil, err + } + + req.Header.Set("User-Agent", ImageUserAgent) + + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("making request: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + } + + var apiResp QwantAPIResponse + if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil { + return nil, fmt.Errorf("decoding response: %v", err) + } + + var results []ImageSearchResult + for _, item := range apiResp.Data.Result.Items { + results = append(results, ImageSearchResult{ + Thumbnail: item.Thumbnail, + Title: item.Title, + Media: item.Media, + Source: item.Url, + ThumbProxy: "/img_proxy?url=" + url.QueryEscape(item.Media), + Width: item.Width, + Height: item.Height, + }) + } + + return results, nil +} diff --git a/images.go b/images.go index 16e8581..f235a85 100644 --- a/images.go +++ b/images.go @@ -1,120 +1,35 @@ package main import ( - "encoding/json" "fmt" "html/template" "log" + "math/rand" "net/http" - "net/url" + "sync" "time" ) -// QwantAPIResponse represents the JSON response structure from Qwant API -type QwantAPIResponse struct { - Data struct { - Result struct { - Items []struct { - Media string `json:"media"` - Thumbnail string `json:"thumbnail"` - Title string `json:"title"` - Url string `json:"url"` - Width int `json:"width"` - Height int `json:"height"` - } `json:"items"` - } `json:"result"` - } `json:"data"` +var ( + imageEngines []imageEngine + imageEngineLock sync.Mutex +) + +type imageEngine struct { + Name string + Func func(string, string, string, int) ([]ImageSearchResult, error) + Weight int } -var funcs = template.FuncMap{ - "sub": func(a, b int) int { - return a - b - }, - "add": func(a, b int) int { - return a + b - }, +func init() { + imageEngines = []imageEngine{ + {Name: "Qwant", Func: PerformQwantImageSearch, Weight: 1}, + {Name: "Imgur", Func: PerformImgurImageSearch, Weight: 2}, + } + + rand.Seed(time.Now().UnixNano()) } -// FetchImageResults contacts the image search API and returns a slice of ImageSearchResult -func fetchImageResults(query string, safe, lang string, page int) ([]ImageSearchResult, error) { - const resultsPerPage = 50 - var offset int - if page <= 1 { - offset = 0 - } else { - offset = (page - 1) * resultsPerPage - } - - // Ensuring safe search is disabled by default if not specified - if safe == "" { - safe = "0" - } - - // Defaulting to English Canada locale if not specified - if lang == "" { - lang = "en_CA" - } - - // Format &lang=lang_de is incorrect, implement fix ! - apiURL := fmt.Sprintf("https://api.qwant.com/v3/search/images?t=images&q=%s&count=%d&locale=%s&offset=%d&device=desktop&tgp=2&safesearch=%s", - url.QueryEscape(query), - resultsPerPage, - lang, - offset, - safe) - - client := &http.Client{Timeout: 10 * time.Second} - - req, err := http.NewRequest("GET", apiURL, nil) - if err != nil { - return nil, fmt.Errorf("creating request: %v", err) - } - - // User Agent generation - ImageUserAgent, err := GetUserAgent("Image-Search") - if err != nil { - fmt.Println("Error:", err) - return nil, err - } - - if debugMode { - fmt.Println("Generated User Agent (images):", ImageUserAgent) - } - - req.Header.Set("User-Agent", ImageUserAgent) - - resp, err := client.Do(req) - if err != nil { - return nil, fmt.Errorf("making request: %v", err) - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) - } - - var apiResp QwantAPIResponse - if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil { - return nil, fmt.Errorf("decoding response: %v", err) - } - - var results []ImageSearchResult - for _, item := range apiResp.Data.Result.Items { - results = append(results, ImageSearchResult{ - Thumbnail: item.Thumbnail, // Thumbnail URL - Title: item.Title, // Image title - Media: item.Media, // Direct link to the image - Source: item.Url, - ThumbProxy: "/img_proxy?url=" + url.QueryEscape(item.Media), - Width: item.Width, - Height: item.Height, - }) - } - - return results, nil -} - -// HandleImageSearch is the HTTP handler for image search requests func handleImageSearch(w http.ResponseWriter, query, safe, lang string, page int) { startTime := time.Now() @@ -174,31 +89,58 @@ func getImageResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string select { case results := <-cacheChan: if results == nil { - combinedResults = fetchAndCacheImageResults(query, safe, lang, page) + combinedResults = fetchImageResults(query, safe, lang, page) + resultsCache.Set(cacheKey, convertToSearchResults(combinedResults)) } else { _, _, imageResults := convertToSpecificResults(results) combinedResults = imageResults } case <-time.After(2 * time.Second): log.Println("Cache check timeout") - combinedResults = fetchAndCacheImageResults(query, safe, lang, page) + combinedResults = fetchImageResults(query, safe, lang, page) + resultsCache.Set(cacheKey, convertToSearchResults(combinedResults)) } return combinedResults } -func fetchAndCacheImageResults(query, safe, lang string, page int) []ImageSearchResult { - results, err := fetchImageResults(query, safe, lang, page) - if err != nil || len(results) == 0 { - log.Printf("Error fetching image results: %v", err) - return []ImageSearchResult{ - {Title: "Results are currently unavailable, sorry. Please try again later."}, - } - } +func fetchImageResults(query, safe, lang string, page int) []ImageSearchResult { + engine := selectImageEngine() + log.Printf("Using image search engine: %s", engine.Name) - // Cache the valid results - cacheKey := CacheKey{Query: query, Page: page, Safe: safe == "true", Lang: lang, Type: "image"} - resultsCache.Set(cacheKey, convertToSearchResults(results)) + results, err := engine.Func(query, safe, lang, page) + if err != nil { + log.Printf("Error performing image search with %s: %v", engine.Name, err) + return nil + } return results } + +func selectImageEngine() imageEngine { + imageEngineLock.Lock() + defer imageEngineLock.Unlock() + + totalWeight := 0 + for _, engine := range imageEngines { + totalWeight += engine.Weight + } + + randValue := rand.Intn(totalWeight) + for _, engine := range imageEngines { + if randValue < engine.Weight { + // Adjust weights for load balancing + for i := range imageEngines { + if imageEngines[i].Name == engine.Name { + imageEngines[i].Weight = max(1, imageEngines[i].Weight-1) + } else { + imageEngines[i].Weight++ + } + } + return engine + } + randValue -= engine.Weight + } + + return imageEngines[0] // fallback to the first engine +} diff --git a/run.sh b/run.sh index aa4f4f3..9fa41ee 100755 --- a/run.sh +++ b/run.sh @@ -1,3 +1,3 @@ #!/bin/bash -go run main.go images.go imageproxy.go video.go map.go text.go text-searchxng.go text-librex.go text-google.go cache.go forums.go files.go files-torrentgalaxy.go files-thepiratebay.go agent.go \ No newline at end of file +go run main.go common.go images.go imageproxy.go images-quant.go images-imgur.go video.go map.go text.go text-searchxng.go text-librex.go text-google.go cache.go forums.go files.go files-torrentgalaxy.go files-thepiratebay.go agent.go \ No newline at end of file diff --git a/text.go b/text.go index ecf5d73..4cc1e0b 100644 --- a/text.go +++ b/text.go @@ -11,7 +11,6 @@ import ( ) var ( - debugMode bool searchEngines []searchEngine searchEngineLock sync.Mutex ) @@ -23,8 +22,6 @@ type searchEngine struct { } func init() { - debugMode = false - searchEngines = []searchEngine{ {Name: "Google", Func: PerformGoogleTextSearch, Weight: 1}, {Name: "LibreX", Func: PerformLibreXTextSearch, Weight: 2}, @@ -138,13 +135,6 @@ func selectSearchEngine() searchEngine { return searchEngines[0] // fallback to the first engine } -func max(a, b int) int { - if a > b { - return a - } - return b -} - func displayResults(w http.ResponseWriter, results []TextSearchResult, query, lang string, elapsed float64, page int, hasPrevPage, hasNextPage bool) { log.Printf("Displaying results for page %d", page) log.Printf("Total results: %d", len(results))