From 5a8da744395dfdf652db5f0a00540ea82154a708 Mon Sep 17 00:00:00 2001 From: partisan Date: Mon, 12 Aug 2024 12:56:42 +0200 Subject: [PATCH] added image fetching using Bing --- README.md | 2 +- images-bing.go | 107 +++++++++++++++++++++++++++++++++++++++++++++++++ images.go | 3 +- 3 files changed, 110 insertions(+), 2 deletions(-) create mode 100644 images-bing.go diff --git a/README.md b/README.md index 0f55f01..f6b7546 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ A self-hosted private and anonymous [metasearch engine](https://en.wikipedia.org ## Features - Text search using Google, Brave, DuckDuckGo and LibreX/Y. -- Image search using the Qwant/Imgur. +- Image search using the Qwant,Bing and Imgur. - Video search using Piped API. - Image viewing using proxy and direct links to image source pages for image searches. - Maps using OpenStreetMap diff --git a/images-bing.go b/images-bing.go new file mode 100644 index 0000000..a9f8717 --- /dev/null +++ b/images-bing.go @@ -0,0 +1,107 @@ +package main + +import ( + "fmt" + "net/http" + "net/url" + "strconv" + "strings" + "time" + + "github.com/PuerkitoBio/goquery" +) + +func PerformBingImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, time.Duration, error) { + startTime := time.Now() + + // Build the search URL + searchURL := buildBingSearchURL(query, page) + + // Make the HTTP request + resp, err := http.Get(searchURL) + if err != nil { + return nil, 0, fmt.Errorf("making request: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + } + + // Parse the HTML document + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + return nil, 0, fmt.Errorf("loading HTML document: %v", err) + } + + // Extract data using goquery + var results []ImageSearchResult + doc.Find(".imgpt").Each(func(i int, s *goquery.Selection) { + imgTag := s.Find("img") + imgSrc, exists := imgTag.Attr("src") + if !exists { + return + } + + title, _ := imgTag.Attr("alt") + + // Extract width and height if available + width, _ := strconv.Atoi(imgTag.AttrOr("width", "0")) + height, _ := strconv.Atoi(imgTag.AttrOr("height", "0")) + + // Extract the original image URL from the `mediaurl` parameter in the link + pageLink, exists := s.Find("a.iusc").Attr("href") + mediaURL := "" + if exists { + if u, err := url.Parse(pageLink); err == nil { + if mediaURLParam := u.Query().Get("mediaurl"); mediaURLParam != "" { + mediaURL, _ = url.QueryUnescape(mediaURLParam) + } + } + } + + results = append(results, ImageSearchResult{ + Thumbnail: imgSrc, + Title: strings.TrimSpace(title), + Media: imgSrc, + Width: width, + Height: height, + Source: mediaURL, // Original image URL + ThumbProxy: imgSrc, + }) + }) + + duration := time.Since(startTime) + + // Check if the number of results is one or less + if len(results) <= 1 { + return nil, duration, fmt.Errorf("no images found") + } + + return results, duration, nil +} + +func buildBingSearchURL(query string, page int) string { + baseURL := "https://www.bing.com/images/search" + params := url.Values{} + params.Add("q", query) + params.Add("first", fmt.Sprintf("%d", (page-1)*35+1)) // Pagination, but increasing it doesn't seem to make a difference + params.Add("count", "35") + params.Add("form", "HDRSC2") + return baseURL + "?" + params.Encode() +} + +// func main() { +// results, duration, err := PerformBingImageSearch("kittens", "false", "en", 1) +// if err != nil { +// fmt.Println("Error:", err) +// return +// } + +// fmt.Printf("Search took: %v\n", duration) +// fmt.Printf("Total results: %d\n", len(results)) +// for _, result := range results { +// fmt.Printf("Title: %s\nThumbnail: %s\nWidth: %d\nHeight: %d\nThumbProxy: %s\nSource (Original Image URL): %s\n\n", +// result.Title, result.Thumbnail, result.Width, result.Height, result.ThumbProxy, result.Source) +// } +// } diff --git a/images.go b/images.go index 13873af..d159165 100644 --- a/images.go +++ b/images.go @@ -13,7 +13,8 @@ var imageSearchEngines []SearchEngine func init() { imageSearchEngines = []SearchEngine{ {Name: "Qwant", Func: wrapImageSearchFunc(PerformQwantImageSearch), Weight: 1}, - {Name: "Imgur", Func: wrapImageSearchFunc(PerformImgurImageSearch), Weight: 2}, + {Name: "Bing", Func: wrapImageSearchFunc(PerformBingImageSearch), Weight: 2}, // Bing sometimes returns with low amount of images, this leads to danamica page loading not working + {Name: "Imgur", Func: wrapImageSearchFunc(PerformImgurImageSearch), Weight: 3}, } }