diff --git a/common.go b/common.go new file mode 100644 index 0000000..d1de775 --- /dev/null +++ b/common.go @@ -0,0 +1,24 @@ +package main + +import ( + "html/template" +) + +var ( + debugMode bool = true + funcs = template.FuncMap{ + "sub": func(a, b int) int { + return a - b + }, + "add": func(a, b int) int { + return a + b + }, + } +) + +func max(a, b int) int { + if a > b { + return a + } + return b +} diff --git a/get-searchxng.go b/get-searchxng.go index cb88261..b6da71a 100644 --- a/get-searchxng.go +++ b/get-searchxng.go @@ -128,10 +128,10 @@ func isInstanceValid(instance SearXInstance) bool { } } -func main() { - instance, err := getRandomSearXInstance() - if err != nil { - log.Fatalf("Failed to get a SearX instance: %v", err) - } - fmt.Printf("Selected SearX instance: %s\n", instance.URL) -} +// func main() { +// instance, err := getRandomSearXInstance() +// if err != nil { +// log.Fatalf("Failed to get a SearX instance: %v", err) +// } +// fmt.Printf("Selected SearX instance: %s\n", instance.URL) +// } diff --git a/go.mod b/go.mod index 4f6d6fa..63d4e99 100644 --- a/go.mod +++ b/go.mod @@ -2,8 +2,19 @@ module searchengine go 1.18 +require github.com/PuerkitoBio/goquery v1.9.1 // direct + require ( - github.com/PuerkitoBio/goquery v1.9.1 // direct github.com/andybalholm/cascadia v1.3.2 // indirect + github.com/chromedp/cdproto v0.0.0-20240202021202-6d0b6a386732 // indirect + github.com/chromedp/chromedp v0.9.5 // indirect + github.com/chromedp/sysutil v1.0.0 // indirect + github.com/gobwas/httphead v0.1.0 // indirect + github.com/gobwas/pool v0.2.1 // indirect + github.com/gobwas/ws v1.3.2 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/mailru/easyjson v0.7.7 // indirect golang.org/x/net v0.21.0 // indirect + golang.org/x/sys v0.17.0 // indirect + golang.org/x/time v0.5.0 // indirect ) diff --git a/go.sum b/go.sum index f988942..77a830d 100644 --- a/go.sum +++ b/go.sum @@ -2,6 +2,24 @@ github.com/PuerkitoBio/goquery v1.9.1 h1:mTL6XjbJTZdpfL+Gwl5U2h1l9yEkJjhmlTeV9VP github.com/PuerkitoBio/goquery v1.9.1/go.mod h1:cW1n6TmIMDoORQU5IU/P1T3tGFunOeXEpGP2WHRwkbY= github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss= github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU= +github.com/chromedp/cdproto v0.0.0-20240202021202-6d0b6a386732 h1:XYUCaZrW8ckGWlCRJKCSoh/iFwlpX316a8yY9IFEzv8= +github.com/chromedp/cdproto v0.0.0-20240202021202-6d0b6a386732/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs= +github.com/chromedp/chromedp v0.9.5 h1:viASzruPJOiThk7c5bueOUY91jGLJVximoEMGoH93rg= +github.com/chromedp/chromedp v0.9.5/go.mod h1:D4I2qONslauw/C7INoCir1BJkSwBYMyZgx8X276z3+Y= +github.com/chromedp/sysutil v1.0.0 h1:+ZxhTpfpZlmchB58ih/LBHX52ky7w2VhQVKQMucy3Ic= +github.com/chromedp/sysutil v1.0.0/go.mod h1:kgWmDdq8fTzXYcKIBqIYvRRTnYb9aNS9moAV0xufSww= +github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU= +github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM= +github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og= +github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= +github.com/gobwas/ws v1.3.2 h1:zlnbNHxumkRvfPWgfXu8RBwyNR1x8wh9cf5PTOCqs9Q= +github.com/gobwas/ws v1.3.2/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/KY= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= @@ -23,7 +41,11 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y= +golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= @@ -33,6 +55,8 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= +golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= diff --git a/images-imgur.go b/images-imgur.go new file mode 100644 index 0000000..2e76879 --- /dev/null +++ b/images-imgur.go @@ -0,0 +1,143 @@ +package main + +import ( + "fmt" + "net/http" + "net/url" + "strconv" + "strings" + + "github.com/PuerkitoBio/goquery" +) + +// PerformImgurImageSearch performs an image search on Imgur and returns the results +func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, error) { + var results []ImageSearchResult + searchURL := buildImgurSearchURL(query, page) + + resp, err := http.Get(searchURL) + if err != nil { + return nil, fmt.Errorf("making request: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + } + + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + return nil, fmt.Errorf("loading HTML document: %v", err) + } + + doc.Find("div.cards div.post").Each(func(i int, s *goquery.Selection) { + thumbnailSrc, exists := s.Find("a img").Attr("src") + if !exists || len(thumbnailSrc) < 25 { + return + } + imgSrc := strings.Replace(thumbnailSrc, "b.", ".", 1) + + // Ensure the URLs have the correct protocol + if !strings.HasPrefix(thumbnailSrc, "http") { + thumbnailSrc = "https:" + thumbnailSrc + } + if !strings.HasPrefix(imgSrc, "http") { + imgSrc = "https:" + imgSrc + } + + urlPath, exists := s.Find("a").Attr("href") + if !exists { + return + } + + // Scrape the image directly from the Imgur page + imgSrc = scrapeImageFromImgurPage("https://imgur.com" + urlPath) + + // Remove any query parameters from the URL + imgSrc = removeQueryParameters(imgSrc) + + title, _ := s.Find("a img").Attr("alt") + + width, _ := strconv.Atoi(s.Find("a img").AttrOr("width", "0")) + height, _ := strconv.Atoi(s.Find("a img").AttrOr("height", "0")) + + results = append(results, ImageSearchResult{ + Thumbnail: thumbnailSrc, + Title: strings.TrimSpace(title), + Media: imgSrc, + Width: width, + Height: height, + Source: "https://imgur.com" + urlPath, + ThumbProxy: imgSrc, //"/img_proxy?url=" + url.QueryEscape(imgSrc) + }) + }) + + return results, nil +} + +// scrapeImageFromImgurPage scrapes the image source from the Imgur page +func scrapeImageFromImgurPage(pageURL string) string { + resp, err := http.Get(pageURL) + if err != nil { + fmt.Printf("Error fetching page: %v\n", err) + return "" + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + fmt.Printf("Unexpected status code: %d\n", resp.StatusCode) + return "" + } + + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + fmt.Printf("Error loading HTML document: %v\n", err) + return "" + } + + imgSrc, exists := doc.Find("meta[property='og:image']").Attr("content") + if !exists { + fmt.Printf("Image not found on page: %s\n", pageURL) + return "" + } + + // Ensure the URL has the correct protocol + if !strings.HasPrefix(imgSrc, "http") { + imgSrc = "https:" + imgSrc + } + + return imgSrc +} + +// removeQueryParameters removes query parameters from a URL +func removeQueryParameters(rawURL string) string { + parsedURL, err := url.Parse(rawURL) + if err != nil { + fmt.Printf("Error parsing URL: %v\n", err) + return rawURL + } + parsedURL.RawQuery = "" + return parsedURL.String() +} + +func buildImgurSearchURL(query string, page int) string { + baseURL := "https://imgur.com/search/score/all" + params := url.Values{} + params.Add("q", query) + params.Add("qs", "thumbs") + params.Add("p", fmt.Sprintf("%d", page-1)) + return fmt.Sprintf("%s?%s", baseURL, params.Encode()) +} + +// func main() { +// results, err := PerformImgurImageSearch("cats", "true", "en", 1) +// if err != nil { +// fmt.Println("Error:", err) +// return +// } + +// for _, result := range results { +// fmt.Printf("Title: %s\nSource: %s\nMedia: %s\nThumbnail: %s\nThumbProxy: %s\nWidth: %d\nHeight: %d\n\n", +// result.Title, result.Source, result.Media, result.Thumbnail, result.ThumbProxy, result.Width, result.Height) +// } +// } diff --git a/images-quant.go b/images-quant.go new file mode 100644 index 0000000..fa799f8 --- /dev/null +++ b/images-quant.go @@ -0,0 +1,95 @@ +package main + +import ( + "encoding/json" + "fmt" + "net/http" + "net/url" + "time" +) + +// QwantAPIResponse represents the JSON response structure from Qwant API +type QwantAPIResponse struct { + Data struct { + Result struct { + Items []struct { + Media string `json:"media"` + Thumbnail string `json:"thumbnail"` + Title string `json:"title"` + Url string `json:"url"` + Width int `json:"width"` + Height int `json:"height"` + } `json:"items"` + } `json:"result"` + } `json:"data"` +} + +// PerformQwantImageSearch performs an image search on Qwant and returns the results. +func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, error) { + const resultsPerPage = 50 + var offset int + if page <= 1 { + offset = 0 + } else { + offset = (page - 1) * resultsPerPage + } + + if safe == "" { + safe = "0" + } + + if lang == "" { + lang = "en_CA" + } + + apiURL := fmt.Sprintf("https://api.qwant.com/v3/search/images?t=images&q=%s&count=%d&locale=%s&offset=%d&device=desktop&tgp=2&safesearch=%s", + url.QueryEscape(query), + resultsPerPage, + lang, + offset, + safe) + + client := &http.Client{Timeout: 10 * time.Second} + + req, err := http.NewRequest("GET", apiURL, nil) + if err != nil { + return nil, fmt.Errorf("creating request: %v", err) + } + + ImageUserAgent, err := GetUserAgent("Image-Search") + if err != nil { + return nil, err + } + + req.Header.Set("User-Agent", ImageUserAgent) + + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("making request: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + } + + var apiResp QwantAPIResponse + if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil { + return nil, fmt.Errorf("decoding response: %v", err) + } + + var results []ImageSearchResult + for _, item := range apiResp.Data.Result.Items { + results = append(results, ImageSearchResult{ + Thumbnail: item.Thumbnail, + Title: item.Title, + Media: item.Media, + Source: item.Url, + ThumbProxy: "/img_proxy?url=" + url.QueryEscape(item.Media), + Width: item.Width, + Height: item.Height, + }) + } + + return results, nil +} diff --git a/images.go b/images.go index 16e8581..f235a85 100644 --- a/images.go +++ b/images.go @@ -1,120 +1,35 @@ package main import ( - "encoding/json" "fmt" "html/template" "log" + "math/rand" "net/http" - "net/url" + "sync" "time" ) -// QwantAPIResponse represents the JSON response structure from Qwant API -type QwantAPIResponse struct { - Data struct { - Result struct { - Items []struct { - Media string `json:"media"` - Thumbnail string `json:"thumbnail"` - Title string `json:"title"` - Url string `json:"url"` - Width int `json:"width"` - Height int `json:"height"` - } `json:"items"` - } `json:"result"` - } `json:"data"` +var ( + imageEngines []imageEngine + imageEngineLock sync.Mutex +) + +type imageEngine struct { + Name string + Func func(string, string, string, int) ([]ImageSearchResult, error) + Weight int } -var funcs = template.FuncMap{ - "sub": func(a, b int) int { - return a - b - }, - "add": func(a, b int) int { - return a + b - }, +func init() { + imageEngines = []imageEngine{ + {Name: "Qwant", Func: PerformQwantImageSearch, Weight: 1}, + {Name: "Imgur", Func: PerformImgurImageSearch, Weight: 2}, + } + + rand.Seed(time.Now().UnixNano()) } -// FetchImageResults contacts the image search API and returns a slice of ImageSearchResult -func fetchImageResults(query string, safe, lang string, page int) ([]ImageSearchResult, error) { - const resultsPerPage = 50 - var offset int - if page <= 1 { - offset = 0 - } else { - offset = (page - 1) * resultsPerPage - } - - // Ensuring safe search is disabled by default if not specified - if safe == "" { - safe = "0" - } - - // Defaulting to English Canada locale if not specified - if lang == "" { - lang = "en_CA" - } - - // Format &lang=lang_de is incorrect, implement fix ! - apiURL := fmt.Sprintf("https://api.qwant.com/v3/search/images?t=images&q=%s&count=%d&locale=%s&offset=%d&device=desktop&tgp=2&safesearch=%s", - url.QueryEscape(query), - resultsPerPage, - lang, - offset, - safe) - - client := &http.Client{Timeout: 10 * time.Second} - - req, err := http.NewRequest("GET", apiURL, nil) - if err != nil { - return nil, fmt.Errorf("creating request: %v", err) - } - - // User Agent generation - ImageUserAgent, err := GetUserAgent("Image-Search") - if err != nil { - fmt.Println("Error:", err) - return nil, err - } - - if debugMode { - fmt.Println("Generated User Agent (images):", ImageUserAgent) - } - - req.Header.Set("User-Agent", ImageUserAgent) - - resp, err := client.Do(req) - if err != nil { - return nil, fmt.Errorf("making request: %v", err) - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) - } - - var apiResp QwantAPIResponse - if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil { - return nil, fmt.Errorf("decoding response: %v", err) - } - - var results []ImageSearchResult - for _, item := range apiResp.Data.Result.Items { - results = append(results, ImageSearchResult{ - Thumbnail: item.Thumbnail, // Thumbnail URL - Title: item.Title, // Image title - Media: item.Media, // Direct link to the image - Source: item.Url, - ThumbProxy: "/img_proxy?url=" + url.QueryEscape(item.Media), - Width: item.Width, - Height: item.Height, - }) - } - - return results, nil -} - -// HandleImageSearch is the HTTP handler for image search requests func handleImageSearch(w http.ResponseWriter, query, safe, lang string, page int) { startTime := time.Now() @@ -174,31 +89,58 @@ func getImageResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string select { case results := <-cacheChan: if results == nil { - combinedResults = fetchAndCacheImageResults(query, safe, lang, page) + combinedResults = fetchImageResults(query, safe, lang, page) + resultsCache.Set(cacheKey, convertToSearchResults(combinedResults)) } else { _, _, imageResults := convertToSpecificResults(results) combinedResults = imageResults } case <-time.After(2 * time.Second): log.Println("Cache check timeout") - combinedResults = fetchAndCacheImageResults(query, safe, lang, page) + combinedResults = fetchImageResults(query, safe, lang, page) + resultsCache.Set(cacheKey, convertToSearchResults(combinedResults)) } return combinedResults } -func fetchAndCacheImageResults(query, safe, lang string, page int) []ImageSearchResult { - results, err := fetchImageResults(query, safe, lang, page) - if err != nil || len(results) == 0 { - log.Printf("Error fetching image results: %v", err) - return []ImageSearchResult{ - {Title: "Results are currently unavailable, sorry. Please try again later."}, - } - } +func fetchImageResults(query, safe, lang string, page int) []ImageSearchResult { + engine := selectImageEngine() + log.Printf("Using image search engine: %s", engine.Name) - // Cache the valid results - cacheKey := CacheKey{Query: query, Page: page, Safe: safe == "true", Lang: lang, Type: "image"} - resultsCache.Set(cacheKey, convertToSearchResults(results)) + results, err := engine.Func(query, safe, lang, page) + if err != nil { + log.Printf("Error performing image search with %s: %v", engine.Name, err) + return nil + } return results } + +func selectImageEngine() imageEngine { + imageEngineLock.Lock() + defer imageEngineLock.Unlock() + + totalWeight := 0 + for _, engine := range imageEngines { + totalWeight += engine.Weight + } + + randValue := rand.Intn(totalWeight) + for _, engine := range imageEngines { + if randValue < engine.Weight { + // Adjust weights for load balancing + for i := range imageEngines { + if imageEngines[i].Name == engine.Name { + imageEngines[i].Weight = max(1, imageEngines[i].Weight-1) + } else { + imageEngines[i].Weight++ + } + } + return engine + } + randValue -= engine.Weight + } + + return imageEngines[0] // fallback to the first engine +} diff --git a/run.sh b/run.sh index 9b6d4d8..9fa41ee 100755 --- a/run.sh +++ b/run.sh @@ -1,3 +1,3 @@ #!/bin/bash -go run main.go images.go imageproxy.go video.go map.go text.go text-searchxng.go text-librex.go text-google.go cache.go forums.go files.go files-torrentgalaxy.go files-thepiratebay.go agent.go --debug \ No newline at end of file +go run main.go common.go images.go imageproxy.go images-quant.go images-imgur.go video.go map.go text.go text-searchxng.go text-librex.go text-google.go cache.go forums.go files.go files-torrentgalaxy.go files-thepiratebay.go agent.go \ No newline at end of file diff --git a/templates/text.html b/templates/text.html index eef8e4c..408c595 100644 --- a/templates/text.html +++ b/templates/text.html @@ -56,7 +56,7 @@ -
+
{{if .Results}} {{range .Results}}
@@ -70,7 +70,7 @@
No results found for '{{ .Query }}'. Try different keywords.
{{end}}
-
+
@@ -83,8 +83,35 @@
diff --git a/text-duckduckgo.go b/text-duckduckgo.go index 881256b..56d098f 100644 --- a/text-duckduckgo.go +++ b/text-duckduckgo.go @@ -1,3 +1,4 @@ +// text-duckduckgo.go package main import ( @@ -6,66 +7,15 @@ import ( "net/http" "net/url" "strings" - "time" "github.com/PuerkitoBio/goquery" ) -const ( - resultsPerPage = 10 -) - -func getVQD(query string) (string, error) { - queryURL := fmt.Sprintf("https://duckduckgo.com/?q=%s", url.QueryEscape(query)) - resp, err := http.Get(queryURL) - if err != nil { - return "", fmt.Errorf("failed to fetch vqd: %v", err) - } - defer resp.Body.Close() - - doc, err := goquery.NewDocumentFromReader(resp.Body) - if err != nil { - return "", fmt.Errorf("loading HTML document: %v", err) - } - - var vqd string - doc.Find("script").Each(func(i int, s *goquery.Selection) { - text := s.Text() - if strings.Contains(text, "vqd=\"") { - start := strings.Index(text, "vqd=\"") + 5 - end := strings.Index(text[start:], "\"") - vqd = text[start : start+end] - } - }) - - if vqd == "" { - return "", fmt.Errorf("vqd not found") - } - - return vqd, nil -} - func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) { var results []TextSearchResult + searchURL := buildDuckDuckGoSearchURL(query, page) - client := &http.Client{Timeout: 10 * time.Second} - - vqd, err := getVQD(query) - if err != nil { - return nil, fmt.Errorf("failed to get vqd: %v", err) - } - - searchURL := fmt.Sprintf("https://duckduckgo.com/html/?q=%s&kl=%s&safe=%s&s=%d&vqd=%s", - url.QueryEscape(query), lang, safe, (page-1)*resultsPerPage, vqd) - - req, err := http.NewRequest("GET", searchURL, nil) - if err != nil { - return nil, fmt.Errorf("failed to create request: %v", err) - } - - req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36") - - resp, err := client.Do(req) + resp, err := http.Get(searchURL) if err != nil { return nil, fmt.Errorf("making request: %v", err) } @@ -94,34 +44,23 @@ func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSear URL: uddg, Header: strings.TrimSpace(header), Description: strings.TrimSpace(description), - Source: "DuckDuckGo", } results = append(results, result) if debugMode { log.Printf("Processed DuckDuckGo result: %+v\n", result) } - } else { - if debugMode { - log.Printf("Missing 'uddg' parameter in URL: %s\n", rawURL) - } } - } else { - if debugMode { - log.Printf("Error parsing URL: %s, error: %v\n", rawURL, err) - } - } - } else { - if debugMode { - log.Printf("Missing 'href' attribute in result anchor tag\n") } } }) - if len(results) == 0 { - if debugMode { - log.Println("No results found from DuckDuckGo") - } - } - return results, nil } + +func buildDuckDuckGoSearchURL(query string, page int) string { + startParam := "" + if page > 1 { + startParam = fmt.Sprintf("&s=%d", (page-1)*10) + } + return fmt.Sprintf("https://duckduckgo.com/html/?q=%s%s", url.QueryEscape(query), startParam) +} \ No newline at end of file diff --git a/text-google.go b/text-google.go index 9c338cc..971c407 100644 --- a/text-google.go +++ b/text-google.go @@ -1,62 +1,47 @@ package main import ( + "context" "fmt" - "log" - "net/http" "net/url" "strings" + "time" "github.com/PuerkitoBio/goquery" + "github.com/chromedp/chromedp" ) func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) { - const resultsPerPage = 10 + opts := append(chromedp.DefaultExecAllocatorOptions[:], + chromedp.DisableGPU, + chromedp.NoDefaultBrowserCheck, + chromedp.NoFirstRun, + chromedp.Flag("disable-javascript", true), + ) + ctx, cancel := chromedp.NewExecAllocator(context.Background(), opts...) + defer cancel() + + ctx, cancel = chromedp.NewContext(ctx) + defer cancel() + var results []TextSearchResult - client := &http.Client{} - searchURL := buildSearchURL(query, safe, lang, page, resultsPerPage) - - req, err := http.NewRequest("GET", searchURL, nil) + searchURL := buildSearchURL(query, safe, lang, page, 10) + var pageSource string + err := chromedp.Run(ctx, + chromedp.Navigate(searchURL), + chromedp.Sleep(2*time.Second), + chromedp.OuterHTML("html", &pageSource), + ) if err != nil { - return nil, fmt.Errorf("failed to create request: %v", err) + return nil, fmt.Errorf("failed to retrieve page source: %v", err) } - // User Agent generation - TextUserAgent, err := GetUserAgent("Text-Search") + newResults, err := parseResults(pageSource) if err != nil { - fmt.Println("Error:", err) - return nil, err - } - - if debugMode { - fmt.Println("Generated User Agent (text):", TextUserAgent) - } - - req.Header.Set("User-Agent", TextUserAgent) - - resp, err := client.Do(req) - if err != nil { - return nil, fmt.Errorf("making request: %v", err) - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) - } - - doc, err := goquery.NewDocumentFromReader(resp.Body) - if err != nil { - return nil, fmt.Errorf("loading HTML document: %v", err) - } - - results = parseResults(doc) - - if len(results) == 0 { - if debugMode { - log.Println("No results found from Google") - } + return nil, fmt.Errorf("error parsing results: %v", err) } + results = append(results, newResults...) return results, nil } @@ -72,20 +57,23 @@ func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string { langParam = "&lr=" + lang } - startIndex := (page - 1) * resultsPerPage - return fmt.Sprintf("https://www.google.com/search?q=%s%s%s&udm=14&start=%d", url.QueryEscape(query), safeParam, langParam, startIndex) + startParam := fmt.Sprintf("&start=%d", (page-1)*resultsPerPage) + + return fmt.Sprintf("https://www.google.com/search?q=%s%s%s%s", url.QueryEscape(query), safeParam, langParam, startParam) } -func parseResults(doc *goquery.Document) []TextSearchResult { +func parseResults(pageSource string) ([]TextSearchResult, error) { var results []TextSearchResult + doc, err := goquery.NewDocumentFromReader(strings.NewReader(pageSource)) + if err != nil { + return nil, fmt.Errorf("loading HTML document: %v", err) + } + doc.Find(".yuRUbf").Each(func(i int, s *goquery.Selection) { link := s.Find("a") href, exists := link.Attr("href") if !exists { - if debugMode { - log.Printf("No href attribute found for result %d\n", i) - } return } @@ -104,10 +92,7 @@ func parseResults(doc *goquery.Document) []TextSearchResult { Description: description, } results = append(results, result) - if debugMode { - log.Printf("Google result: %+v\n", result) - } }) - return results + return results, nil } diff --git a/text-librex.go b/text-librex.go index 450f20d..526d7e8 100644 --- a/text-librex.go +++ b/text-librex.go @@ -20,7 +20,7 @@ type LibreXResponse []LibreXResult func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) { // LibreX uses page starting from 0 - searchURL := fmt.Sprintf("https://%s/api.php?q=%s&p=%d&t=0", LIBREX_DOMAIN, url.QueryEscape(query), page-1) + searchURL := fmt.Sprintf("https://%s/api.php?q=%s&p=%d&t=0", LIBREX_DOMAIN, url.QueryEscape(query), page) // User Agent generation userAgent, err := GetUserAgent("librex-text-search") @@ -63,10 +63,6 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe Source: "LibreX", } - if debugMode { - log.Printf("LibreX result: %+v\n", result) - } - results = append(results, result) } diff --git a/text-quant.go b/text-quant.go index c090ffe..de8b03a 100644 --- a/text-quant.go +++ b/text-quant.go @@ -3,7 +3,6 @@ package main import ( "encoding/json" "fmt" - "log" "net/http" "net/url" "time" @@ -27,11 +26,9 @@ type QwantTextAPIResponse struct { } // PerformQwantTextSearch contacts the Qwant API and returns a slice of TextSearchResult -func PerformQwantTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) { +func PerformQwantTextSearch(query, safe, lang string) ([]TextSearchResult, error) { const resultsPerPage = 10 - - // Calculate the offset based on the page number - offset := (page - 1) * resultsPerPage + const offset = 0 // Ensure safe search is disabled by default if not specified if safe == "" { @@ -43,12 +40,11 @@ func PerformQwantTextSearch(query, safe, lang string, page int) ([]TextSearchRes lang = "en_CA" } - apiURL := fmt.Sprintf("https://api.qwant.com/v3/search/web?q=%s&count=%d&locale=%s&offset=%d&device=desktop&safesearch=%s", + apiURL := fmt.Sprintf("https://api.qwant.com/v3/search/web?q=%s&count=%d&locale=%s&offset=%d&device=desktop", url.QueryEscape(query), resultsPerPage, lang, - offset, - safe) + offset) client := &http.Client{Timeout: 10 * time.Second} @@ -97,9 +93,6 @@ func PerformQwantTextSearch(query, safe, lang string, page int) ([]TextSearchRes func cleanQwantURL(rawURL string) string { u, err := url.Parse(rawURL) if err != nil { - if debugMode { - log.Printf("Error parsing URL: %v", err) - } return rawURL } return u.Scheme + "://" + u.Host + u.Path diff --git a/text.go b/text.go index 6845fab..4cc1e0b 100644 --- a/text.go +++ b/text.go @@ -1,44 +1,56 @@ package main import ( - "flag" "fmt" "html/template" "log" + "math/rand" "net/http" - "sort" "sync" "time" ) var ( - debugMode bool + searchEngines []searchEngine + searchEngineLock sync.Mutex ) +type searchEngine struct { + Name string + Func func(string, string, string, int) ([]TextSearchResult, error) + Weight int +} + func init() { - flag.BoolVar(&debugMode, "debug", false, "enable debug mode") - flag.Parse() + searchEngines = []searchEngine{ + {Name: "Google", Func: PerformGoogleTextSearch, Weight: 1}, + {Name: "LibreX", Func: PerformLibreXTextSearch, Weight: 2}, + // {Name: "DuckDuckGo", Func: PerformDuckDuckGoTextSearch, Weight: 3}, // DuckDuckGo timeouts too fast and search results are trash + // {Name: "SearXNG", Func: PerformSearXNGTextSearch, Weight: 2}, // Uncomment when implemented + } + + rand.Seed(time.Now().UnixNano()) } func HandleTextSearch(w http.ResponseWriter, query, safe, lang string, page int) { startTime := time.Now() - const resultsPerPage = 10 cacheKey := CacheKey{Query: query, Page: page, Safe: safe == "true", Lang: lang, Type: "text"} - combinedResults := getTextResultsFromCacheOrFetch(cacheKey, query, safe, lang, page, resultsPerPage) + combinedResults := getTextResultsFromCacheOrFetch(cacheKey, query, safe, lang, page) hasPrevPage := page > 1 - hasNextPage := len(combinedResults) == resultsPerPage + hasNextPage := len(combinedResults) > 0 displayResults(w, combinedResults, query, lang, time.Since(startTime).Seconds(), page, hasPrevPage, hasNextPage) - // Always check and cache the next page if not enough results - if hasNextPage { - go cacheNextPageIfNotCached(query, safe, lang, page+1, resultsPerPage) + // Prefetch next and previous pages + go prefetchPage(query, safe, lang, page+1) + if hasPrevPage { + go prefetchPage(query, safe, lang, page-1) } } -func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string, page, resultsPerPage int) []TextSearchResult { +func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string, page int) []TextSearchResult { cacheChan := make(chan []SearchResult) var combinedResults []TextSearchResult @@ -56,7 +68,7 @@ func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string, select { case results := <-cacheChan: if results == nil { - combinedResults = fetchTextResultsUntilFull(query, safe, lang, page, resultsPerPage) + combinedResults = fetchTextResults(query, safe, lang, page) resultsCache.Set(cacheKey, convertToSearchResults(combinedResults)) } else { textResults, _, _ := convertToSpecificResults(results) @@ -64,129 +76,63 @@ func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string, } case <-time.After(2 * time.Second): log.Println("Cache check timeout") - combinedResults = fetchTextResultsUntilFull(query, safe, lang, page, resultsPerPage) + combinedResults = fetchTextResults(query, safe, lang, page) resultsCache.Set(cacheKey, convertToSearchResults(combinedResults)) } return combinedResults } -func cacheNextPageIfNotCached(query, safe, lang string, page, resultsPerPage int) { +func prefetchPage(query, safe, lang string, page int) { cacheKey := CacheKey{Query: query, Page: page, Safe: safe == "true", Lang: lang, Type: "text"} if _, exists := resultsCache.Get(cacheKey); !exists { - log.Printf("Next page %d not cached, caching now...", page) - nextPageResults := fetchTextResultsUntilFull(query, safe, lang, page, resultsPerPage) - resultsCache.Set(cacheKey, convertToSearchResults(nextPageResults)) + log.Printf("Page %d not cached, caching now...", page) + pageResults := fetchTextResults(query, safe, lang, page) + resultsCache.Set(cacheKey, convertToSearchResults(pageResults)) } else { - log.Printf("Next page %d already cached", page) + log.Printf("Page %d already cached", page) } } -func fetchTextResultsUntilFull(query, safe, lang string, targetPage, resultsPerPage int) []TextSearchResult { - var combinedResults []TextSearchResult - currentPage := 1 - resultsNeeded := targetPage * resultsPerPage +func fetchTextResults(query, safe, lang string, page int) []TextSearchResult { + engine := selectSearchEngine() + log.Printf("Using search engine: %s", engine.Name) - for len(combinedResults) < resultsNeeded { - cacheKey := CacheKey{Query: query, Page: targetPage, Safe: safe == "true", Lang: lang, Type: "text"} - cachedResults, exists := resultsCache.Get(cacheKey) - if exists { - textResults, _, _ := convertToSpecificResults(cachedResults) - combinedResults = append(combinedResults, textResults...) - } else { - results := fetchAndCacheTextResults(query, safe, lang, currentPage, resultsPerPage) - if len(results) == 0 { - break - } - combinedResults = append(combinedResults, results...) - resultsCache.Set(cacheKey, convertToSearchResults(results)) - } - - currentPage++ - - // Stop fetching if we have enough results for the target page and the next page - if len(combinedResults) >= resultsNeeded+resultsPerPage { - break - } + results, err := engine.Func(query, safe, lang, page) + if err != nil { + log.Printf("Error performing search with %s: %v", engine.Name, err) + return nil } - startIndex := (targetPage - 1) * resultsPerPage - endIndex := startIndex + resultsPerPage - - if startIndex >= len(combinedResults) { - return []TextSearchResult{} - } - if endIndex > len(combinedResults) { - endIndex = len(combinedResults) - } - - return combinedResults[startIndex:endIndex] + return results } -func fetchAndCacheTextResults(query, safe, lang string, page, resultsPerPage int) []TextSearchResult { - var combinedResults []TextSearchResult - var wg sync.WaitGroup - var mu sync.Mutex +func selectSearchEngine() searchEngine { + searchEngineLock.Lock() + defer searchEngineLock.Unlock() - resultsChan := make(chan []TextSearchResult) - - searchFuncs := []struct { - Func func(string, string, string, int) ([]TextSearchResult, error) - Source string - }{ - {PerformGoogleTextSearch, "Google"}, - {PerformLibreXTextSearch, "LibreX"}, - // {PerformSearXNGTextSearch, "SearXNG"}, + totalWeight := 0 + for _, engine := range searchEngines { + totalWeight += engine.Weight } - wg.Add(len(searchFuncs)) - - for _, searchFunc := range searchFuncs { - go func(searchFunc func(string, string, string, int) ([]TextSearchResult, error), source string) { - defer wg.Done() - results, err := searchFunc(query, safe, lang, page) - if err == nil { - for i := range results { - results[i].Source = source + randValue := rand.Intn(totalWeight) + for _, engine := range searchEngines { + if randValue < engine.Weight { + // Adjust weights for load balancing + for i := range searchEngines { + if searchEngines[i].Name == engine.Name { + searchEngines[i].Weight = max(1, searchEngines[i].Weight-1) + } else { + searchEngines[i].Weight++ } - resultsChan <- results - } else { - log.Printf("Error performing search from %s: %v", source, err) } - }(searchFunc.Func, searchFunc.Source) + return engine + } + randValue -= engine.Weight } - go func() { - wg.Wait() - close(resultsChan) - }() - - for results := range resultsChan { - mu.Lock() - combinedResults = append(combinedResults, results...) - mu.Unlock() - } - - sort.SliceStable(combinedResults, func(i, j int) bool { - return sourceOrder(combinedResults[i].Source) < sourceOrder(combinedResults[j].Source) - }) - - log.Printf("Fetched %d results for page %d", len(combinedResults), page) - - return combinedResults -} - -func sourceOrder(source string) int { - switch source { - case "Google": - return 1 - case "LibreX": - return 2 - case "SearchXNG": - return 3 - default: - return 4 - } + return searchEngines[0] // fallback to the first engine } func displayResults(w http.ResponseWriter, results []TextSearchResult, query, lang string, elapsed float64, page int, hasPrevPage, hasNextPage bool) {