merge work repo to main #1
14 changed files with 504 additions and 379 deletions
24
common.go
Normal file
24
common.go
Normal file
|
@ -0,0 +1,24 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"html/template"
|
||||
)
|
||||
|
||||
var (
|
||||
debugMode bool = true
|
||||
funcs = template.FuncMap{
|
||||
"sub": func(a, b int) int {
|
||||
return a - b
|
||||
},
|
||||
"add": func(a, b int) int {
|
||||
return a + b
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
func max(a, b int) int {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
|
@ -128,10 +128,10 @@ func isInstanceValid(instance SearXInstance) bool {
|
|||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
instance, err := getRandomSearXInstance()
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to get a SearX instance: %v", err)
|
||||
}
|
||||
fmt.Printf("Selected SearX instance: %s\n", instance.URL)
|
||||
}
|
||||
// func main() {
|
||||
// instance, err := getRandomSearXInstance()
|
||||
// if err != nil {
|
||||
// log.Fatalf("Failed to get a SearX instance: %v", err)
|
||||
// }
|
||||
// fmt.Printf("Selected SearX instance: %s\n", instance.URL)
|
||||
// }
|
||||
|
|
13
go.mod
13
go.mod
|
@ -2,8 +2,19 @@ module searchengine
|
|||
|
||||
go 1.18
|
||||
|
||||
require github.com/PuerkitoBio/goquery v1.9.1 // direct
|
||||
|
||||
require (
|
||||
github.com/PuerkitoBio/goquery v1.9.1 // direct
|
||||
github.com/andybalholm/cascadia v1.3.2 // indirect
|
||||
github.com/chromedp/cdproto v0.0.0-20240202021202-6d0b6a386732 // indirect
|
||||
github.com/chromedp/chromedp v0.9.5 // indirect
|
||||
github.com/chromedp/sysutil v1.0.0 // indirect
|
||||
github.com/gobwas/httphead v0.1.0 // indirect
|
||||
github.com/gobwas/pool v0.2.1 // indirect
|
||||
github.com/gobwas/ws v1.3.2 // indirect
|
||||
github.com/josharian/intern v1.0.0 // indirect
|
||||
github.com/mailru/easyjson v0.7.7 // indirect
|
||||
golang.org/x/net v0.21.0 // indirect
|
||||
golang.org/x/sys v0.17.0 // indirect
|
||||
golang.org/x/time v0.5.0 // indirect
|
||||
)
|
||||
|
|
24
go.sum
24
go.sum
|
@ -2,6 +2,24 @@ github.com/PuerkitoBio/goquery v1.9.1 h1:mTL6XjbJTZdpfL+Gwl5U2h1l9yEkJjhmlTeV9VP
|
|||
github.com/PuerkitoBio/goquery v1.9.1/go.mod h1:cW1n6TmIMDoORQU5IU/P1T3tGFunOeXEpGP2WHRwkbY=
|
||||
github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
|
||||
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
|
||||
github.com/chromedp/cdproto v0.0.0-20240202021202-6d0b6a386732 h1:XYUCaZrW8ckGWlCRJKCSoh/iFwlpX316a8yY9IFEzv8=
|
||||
github.com/chromedp/cdproto v0.0.0-20240202021202-6d0b6a386732/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs=
|
||||
github.com/chromedp/chromedp v0.9.5 h1:viASzruPJOiThk7c5bueOUY91jGLJVximoEMGoH93rg=
|
||||
github.com/chromedp/chromedp v0.9.5/go.mod h1:D4I2qONslauw/C7INoCir1BJkSwBYMyZgx8X276z3+Y=
|
||||
github.com/chromedp/sysutil v1.0.0 h1:+ZxhTpfpZlmchB58ih/LBHX52ky7w2VhQVKQMucy3Ic=
|
||||
github.com/chromedp/sysutil v1.0.0/go.mod h1:kgWmDdq8fTzXYcKIBqIYvRRTnYb9aNS9moAV0xufSww=
|
||||
github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU=
|
||||
github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM=
|
||||
github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og=
|
||||
github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw=
|
||||
github.com/gobwas/ws v1.3.2 h1:zlnbNHxumkRvfPWgfXu8RBwyNR1x8wh9cf5PTOCqs9Q=
|
||||
github.com/gobwas/ws v1.3.2/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/KY=
|
||||
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
|
||||
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
|
||||
github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs=
|
||||
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
|
||||
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
|
||||
github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0=
|
||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||
|
@ -23,7 +41,11 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc
|
|||
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
|
||||
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
|
||||
|
@ -33,6 +55,8 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
|||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
||||
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
|
||||
golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk=
|
||||
golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
||||
|
|
143
images-imgur.go
Normal file
143
images-imgur.go
Normal file
|
@ -0,0 +1,143 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
// PerformImgurImageSearch performs an image search on Imgur and returns the results
|
||||
func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, error) {
|
||||
var results []ImageSearchResult
|
||||
searchURL := buildImgurSearchURL(query, page)
|
||||
|
||||
resp, err := http.Get(searchURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("making request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("loading HTML document: %v", err)
|
||||
}
|
||||
|
||||
doc.Find("div.cards div.post").Each(func(i int, s *goquery.Selection) {
|
||||
thumbnailSrc, exists := s.Find("a img").Attr("src")
|
||||
if !exists || len(thumbnailSrc) < 25 {
|
||||
return
|
||||
}
|
||||
imgSrc := strings.Replace(thumbnailSrc, "b.", ".", 1)
|
||||
|
||||
// Ensure the URLs have the correct protocol
|
||||
if !strings.HasPrefix(thumbnailSrc, "http") {
|
||||
thumbnailSrc = "https:" + thumbnailSrc
|
||||
}
|
||||
if !strings.HasPrefix(imgSrc, "http") {
|
||||
imgSrc = "https:" + imgSrc
|
||||
}
|
||||
|
||||
urlPath, exists := s.Find("a").Attr("href")
|
||||
if !exists {
|
||||
return
|
||||
}
|
||||
|
||||
// Scrape the image directly from the Imgur page
|
||||
imgSrc = scrapeImageFromImgurPage("https://imgur.com" + urlPath)
|
||||
|
||||
// Remove any query parameters from the URL
|
||||
imgSrc = removeQueryParameters(imgSrc)
|
||||
|
||||
title, _ := s.Find("a img").Attr("alt")
|
||||
|
||||
width, _ := strconv.Atoi(s.Find("a img").AttrOr("width", "0"))
|
||||
height, _ := strconv.Atoi(s.Find("a img").AttrOr("height", "0"))
|
||||
|
||||
results = append(results, ImageSearchResult{
|
||||
Thumbnail: thumbnailSrc,
|
||||
Title: strings.TrimSpace(title),
|
||||
Media: imgSrc,
|
||||
Width: width,
|
||||
Height: height,
|
||||
Source: "https://imgur.com" + urlPath,
|
||||
ThumbProxy: imgSrc, //"/img_proxy?url=" + url.QueryEscape(imgSrc)
|
||||
})
|
||||
})
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// scrapeImageFromImgurPage scrapes the image source from the Imgur page
|
||||
func scrapeImageFromImgurPage(pageURL string) string {
|
||||
resp, err := http.Get(pageURL)
|
||||
if err != nil {
|
||||
fmt.Printf("Error fetching page: %v\n", err)
|
||||
return ""
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
fmt.Printf("Unexpected status code: %d\n", resp.StatusCode)
|
||||
return ""
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
fmt.Printf("Error loading HTML document: %v\n", err)
|
||||
return ""
|
||||
}
|
||||
|
||||
imgSrc, exists := doc.Find("meta[property='og:image']").Attr("content")
|
||||
if !exists {
|
||||
fmt.Printf("Image not found on page: %s\n", pageURL)
|
||||
return ""
|
||||
}
|
||||
|
||||
// Ensure the URL has the correct protocol
|
||||
if !strings.HasPrefix(imgSrc, "http") {
|
||||
imgSrc = "https:" + imgSrc
|
||||
}
|
||||
|
||||
return imgSrc
|
||||
}
|
||||
|
||||
// removeQueryParameters removes query parameters from a URL
|
||||
func removeQueryParameters(rawURL string) string {
|
||||
parsedURL, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
fmt.Printf("Error parsing URL: %v\n", err)
|
||||
return rawURL
|
||||
}
|
||||
parsedURL.RawQuery = ""
|
||||
return parsedURL.String()
|
||||
}
|
||||
|
||||
func buildImgurSearchURL(query string, page int) string {
|
||||
baseURL := "https://imgur.com/search/score/all"
|
||||
params := url.Values{}
|
||||
params.Add("q", query)
|
||||
params.Add("qs", "thumbs")
|
||||
params.Add("p", fmt.Sprintf("%d", page-1))
|
||||
return fmt.Sprintf("%s?%s", baseURL, params.Encode())
|
||||
}
|
||||
|
||||
// func main() {
|
||||
// results, err := PerformImgurImageSearch("cats", "true", "en", 1)
|
||||
// if err != nil {
|
||||
// fmt.Println("Error:", err)
|
||||
// return
|
||||
// }
|
||||
|
||||
// for _, result := range results {
|
||||
// fmt.Printf("Title: %s\nSource: %s\nMedia: %s\nThumbnail: %s\nThumbProxy: %s\nWidth: %d\nHeight: %d\n\n",
|
||||
// result.Title, result.Source, result.Media, result.Thumbnail, result.ThumbProxy, result.Width, result.Height)
|
||||
// }
|
||||
// }
|
95
images-quant.go
Normal file
95
images-quant.go
Normal file
|
@ -0,0 +1,95 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"time"
|
||||
)
|
||||
|
||||
// QwantAPIResponse represents the JSON response structure from Qwant API
|
||||
type QwantAPIResponse struct {
|
||||
Data struct {
|
||||
Result struct {
|
||||
Items []struct {
|
||||
Media string `json:"media"`
|
||||
Thumbnail string `json:"thumbnail"`
|
||||
Title string `json:"title"`
|
||||
Url string `json:"url"`
|
||||
Width int `json:"width"`
|
||||
Height int `json:"height"`
|
||||
} `json:"items"`
|
||||
} `json:"result"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
// PerformQwantImageSearch performs an image search on Qwant and returns the results.
|
||||
func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, error) {
|
||||
const resultsPerPage = 50
|
||||
var offset int
|
||||
if page <= 1 {
|
||||
offset = 0
|
||||
} else {
|
||||
offset = (page - 1) * resultsPerPage
|
||||
}
|
||||
|
||||
if safe == "" {
|
||||
safe = "0"
|
||||
}
|
||||
|
||||
if lang == "" {
|
||||
lang = "en_CA"
|
||||
}
|
||||
|
||||
apiURL := fmt.Sprintf("https://api.qwant.com/v3/search/images?t=images&q=%s&count=%d&locale=%s&offset=%d&device=desktop&tgp=2&safesearch=%s",
|
||||
url.QueryEscape(query),
|
||||
resultsPerPage,
|
||||
lang,
|
||||
offset,
|
||||
safe)
|
||||
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
|
||||
req, err := http.NewRequest("GET", apiURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating request: %v", err)
|
||||
}
|
||||
|
||||
ImageUserAgent, err := GetUserAgent("Image-Search")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", ImageUserAgent)
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("making request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var apiResp QwantAPIResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil {
|
||||
return nil, fmt.Errorf("decoding response: %v", err)
|
||||
}
|
||||
|
||||
var results []ImageSearchResult
|
||||
for _, item := range apiResp.Data.Result.Items {
|
||||
results = append(results, ImageSearchResult{
|
||||
Thumbnail: item.Thumbnail,
|
||||
Title: item.Title,
|
||||
Media: item.Media,
|
||||
Source: item.Url,
|
||||
ThumbProxy: "/img_proxy?url=" + url.QueryEscape(item.Media),
|
||||
Width: item.Width,
|
||||
Height: item.Height,
|
||||
})
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
174
images.go
174
images.go
|
@ -1,120 +1,35 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"html/template"
|
||||
"log"
|
||||
"math/rand"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// QwantAPIResponse represents the JSON response structure from Qwant API
|
||||
type QwantAPIResponse struct {
|
||||
Data struct {
|
||||
Result struct {
|
||||
Items []struct {
|
||||
Media string `json:"media"`
|
||||
Thumbnail string `json:"thumbnail"`
|
||||
Title string `json:"title"`
|
||||
Url string `json:"url"`
|
||||
Width int `json:"width"`
|
||||
Height int `json:"height"`
|
||||
} `json:"items"`
|
||||
} `json:"result"`
|
||||
} `json:"data"`
|
||||
var (
|
||||
imageEngines []imageEngine
|
||||
imageEngineLock sync.Mutex
|
||||
)
|
||||
|
||||
type imageEngine struct {
|
||||
Name string
|
||||
Func func(string, string, string, int) ([]ImageSearchResult, error)
|
||||
Weight int
|
||||
}
|
||||
|
||||
var funcs = template.FuncMap{
|
||||
"sub": func(a, b int) int {
|
||||
return a - b
|
||||
},
|
||||
"add": func(a, b int) int {
|
||||
return a + b
|
||||
},
|
||||
func init() {
|
||||
imageEngines = []imageEngine{
|
||||
{Name: "Qwant", Func: PerformQwantImageSearch, Weight: 1},
|
||||
{Name: "Imgur", Func: PerformImgurImageSearch, Weight: 2},
|
||||
}
|
||||
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
}
|
||||
|
||||
// FetchImageResults contacts the image search API and returns a slice of ImageSearchResult
|
||||
func fetchImageResults(query string, safe, lang string, page int) ([]ImageSearchResult, error) {
|
||||
const resultsPerPage = 50
|
||||
var offset int
|
||||
if page <= 1 {
|
||||
offset = 0
|
||||
} else {
|
||||
offset = (page - 1) * resultsPerPage
|
||||
}
|
||||
|
||||
// Ensuring safe search is disabled by default if not specified
|
||||
if safe == "" {
|
||||
safe = "0"
|
||||
}
|
||||
|
||||
// Defaulting to English Canada locale if not specified
|
||||
if lang == "" {
|
||||
lang = "en_CA"
|
||||
}
|
||||
|
||||
// Format &lang=lang_de is incorrect, implement fix !
|
||||
apiURL := fmt.Sprintf("https://api.qwant.com/v3/search/images?t=images&q=%s&count=%d&locale=%s&offset=%d&device=desktop&tgp=2&safesearch=%s",
|
||||
url.QueryEscape(query),
|
||||
resultsPerPage,
|
||||
lang,
|
||||
offset,
|
||||
safe)
|
||||
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
|
||||
req, err := http.NewRequest("GET", apiURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating request: %v", err)
|
||||
}
|
||||
|
||||
// User Agent generation
|
||||
ImageUserAgent, err := GetUserAgent("Image-Search")
|
||||
if err != nil {
|
||||
fmt.Println("Error:", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if debugMode {
|
||||
fmt.Println("Generated User Agent (images):", ImageUserAgent)
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", ImageUserAgent)
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("making request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var apiResp QwantAPIResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil {
|
||||
return nil, fmt.Errorf("decoding response: %v", err)
|
||||
}
|
||||
|
||||
var results []ImageSearchResult
|
||||
for _, item := range apiResp.Data.Result.Items {
|
||||
results = append(results, ImageSearchResult{
|
||||
Thumbnail: item.Thumbnail, // Thumbnail URL
|
||||
Title: item.Title, // Image title
|
||||
Media: item.Media, // Direct link to the image
|
||||
Source: item.Url,
|
||||
ThumbProxy: "/img_proxy?url=" + url.QueryEscape(item.Media),
|
||||
Width: item.Width,
|
||||
Height: item.Height,
|
||||
})
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// HandleImageSearch is the HTTP handler for image search requests
|
||||
func handleImageSearch(w http.ResponseWriter, query, safe, lang string, page int) {
|
||||
startTime := time.Now()
|
||||
|
||||
|
@ -174,31 +89,58 @@ func getImageResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string
|
|||
select {
|
||||
case results := <-cacheChan:
|
||||
if results == nil {
|
||||
combinedResults = fetchAndCacheImageResults(query, safe, lang, page)
|
||||
combinedResults = fetchImageResults(query, safe, lang, page)
|
||||
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
|
||||
} else {
|
||||
_, _, imageResults := convertToSpecificResults(results)
|
||||
combinedResults = imageResults
|
||||
}
|
||||
case <-time.After(2 * time.Second):
|
||||
log.Println("Cache check timeout")
|
||||
combinedResults = fetchAndCacheImageResults(query, safe, lang, page)
|
||||
combinedResults = fetchImageResults(query, safe, lang, page)
|
||||
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
|
||||
}
|
||||
|
||||
return combinedResults
|
||||
}
|
||||
|
||||
func fetchAndCacheImageResults(query, safe, lang string, page int) []ImageSearchResult {
|
||||
results, err := fetchImageResults(query, safe, lang, page)
|
||||
if err != nil || len(results) == 0 {
|
||||
log.Printf("Error fetching image results: %v", err)
|
||||
return []ImageSearchResult{
|
||||
{Title: "Results are currently unavailable, sorry. Please try again later."},
|
||||
}
|
||||
}
|
||||
func fetchImageResults(query, safe, lang string, page int) []ImageSearchResult {
|
||||
engine := selectImageEngine()
|
||||
log.Printf("Using image search engine: %s", engine.Name)
|
||||
|
||||
// Cache the valid results
|
||||
cacheKey := CacheKey{Query: query, Page: page, Safe: safe == "true", Lang: lang, Type: "image"}
|
||||
resultsCache.Set(cacheKey, convertToSearchResults(results))
|
||||
results, err := engine.Func(query, safe, lang, page)
|
||||
if err != nil {
|
||||
log.Printf("Error performing image search with %s: %v", engine.Name, err)
|
||||
return nil
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
func selectImageEngine() imageEngine {
|
||||
imageEngineLock.Lock()
|
||||
defer imageEngineLock.Unlock()
|
||||
|
||||
totalWeight := 0
|
||||
for _, engine := range imageEngines {
|
||||
totalWeight += engine.Weight
|
||||
}
|
||||
|
||||
randValue := rand.Intn(totalWeight)
|
||||
for _, engine := range imageEngines {
|
||||
if randValue < engine.Weight {
|
||||
// Adjust weights for load balancing
|
||||
for i := range imageEngines {
|
||||
if imageEngines[i].Name == engine.Name {
|
||||
imageEngines[i].Weight = max(1, imageEngines[i].Weight-1)
|
||||
} else {
|
||||
imageEngines[i].Weight++
|
||||
}
|
||||
}
|
||||
return engine
|
||||
}
|
||||
randValue -= engine.Weight
|
||||
}
|
||||
|
||||
return imageEngines[0] // fallback to the first engine
|
||||
}
|
||||
|
|
2
run.sh
2
run.sh
|
@ -1,3 +1,3 @@
|
|||
#!/bin/bash
|
||||
|
||||
go run main.go images.go imageproxy.go video.go map.go text.go text-searchxng.go text-librex.go text-google.go cache.go forums.go files.go files-torrentgalaxy.go files-thepiratebay.go agent.go --debug
|
||||
go run main.go common.go images.go imageproxy.go images-quant.go images-imgur.go video.go map.go text.go text-searchxng.go text-librex.go text-google.go cache.go forums.go files.go files-torrentgalaxy.go files-thepiratebay.go agent.go
|
|
@ -56,7 +56,7 @@
|
|||
</select>
|
||||
<button class="results-save" name="t" value="text">Apply settings</button>
|
||||
</form>
|
||||
<div class="results">
|
||||
<div class="results" id="results">
|
||||
{{if .Results}}
|
||||
{{range .Results}}
|
||||
<div class="result_item">
|
||||
|
@ -70,7 +70,7 @@
|
|||
<div class="no-results">No results found for '{{ .Query }}'. Try different keywords.</div>
|
||||
{{end}}
|
||||
</div>
|
||||
<div class="prev-next prev-img">
|
||||
<div class="prev-next prev-img" id="prev-next">
|
||||
<form action="/search" method="get">
|
||||
<input type="hidden" name="q" value="{{ .Query }}">
|
||||
<input type="hidden" name="t" value="text">
|
||||
|
@ -83,8 +83,35 @@
|
|||
</form>
|
||||
</div>
|
||||
<script>
|
||||
// Check if JavaScript is enabled and modify the DOM accordingly
|
||||
document.getElementById('content').classList.remove('js-enabled');
|
||||
|
||||
document.addEventListener("DOMContentLoaded", function() {
|
||||
if (document.getElementById('prev-next')) {
|
||||
document.getElementById('prev-next').style.display = 'none';
|
||||
|
||||
let page = {{ .Page }};
|
||||
const query = "{{ .Query }}";
|
||||
|
||||
function loadResults(newPage) {
|
||||
fetch(`/search?q=${encodeURIComponent(query)}&t=text&p=${newPage}`)
|
||||
.then(response => response.text())
|
||||
.then(data => {
|
||||
const parser = new DOMParser();
|
||||
const doc = parser.parseFromString(data, 'text/html');
|
||||
const newResults = doc.getElementById('results').innerHTML;
|
||||
document.getElementById('results').innerHTML += newResults;
|
||||
page = newPage;
|
||||
})
|
||||
.catch(error => console.error('Error loading results:', error));
|
||||
}
|
||||
|
||||
window.addEventListener('scroll', () => {
|
||||
if (window.innerHeight + window.scrollY >= document.body.offsetHeight) {
|
||||
loadResults(page + 1);
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
// text-duckduckgo.go
|
||||
package main
|
||||
|
||||
import (
|
||||
|
@ -6,66 +7,15 @@ import (
|
|||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
const (
|
||||
resultsPerPage = 10
|
||||
)
|
||||
|
||||
func getVQD(query string) (string, error) {
|
||||
queryURL := fmt.Sprintf("https://duckduckgo.com/?q=%s", url.QueryEscape(query))
|
||||
resp, err := http.Get(queryURL)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to fetch vqd: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("loading HTML document: %v", err)
|
||||
}
|
||||
|
||||
var vqd string
|
||||
doc.Find("script").Each(func(i int, s *goquery.Selection) {
|
||||
text := s.Text()
|
||||
if strings.Contains(text, "vqd=\"") {
|
||||
start := strings.Index(text, "vqd=\"") + 5
|
||||
end := strings.Index(text[start:], "\"")
|
||||
vqd = text[start : start+end]
|
||||
}
|
||||
})
|
||||
|
||||
if vqd == "" {
|
||||
return "", fmt.Errorf("vqd not found")
|
||||
}
|
||||
|
||||
return vqd, nil
|
||||
}
|
||||
|
||||
func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
|
||||
var results []TextSearchResult
|
||||
searchURL := buildDuckDuckGoSearchURL(query, page)
|
||||
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
|
||||
vqd, err := getVQD(query)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get vqd: %v", err)
|
||||
}
|
||||
|
||||
searchURL := fmt.Sprintf("https://duckduckgo.com/html/?q=%s&kl=%s&safe=%s&s=%d&vqd=%s",
|
||||
url.QueryEscape(query), lang, safe, (page-1)*resultsPerPage, vqd)
|
||||
|
||||
req, err := http.NewRequest("GET", searchURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %v", err)
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36")
|
||||
|
||||
resp, err := client.Do(req)
|
||||
resp, err := http.Get(searchURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("making request: %v", err)
|
||||
}
|
||||
|
@ -94,34 +44,23 @@ func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSear
|
|||
URL: uddg,
|
||||
Header: strings.TrimSpace(header),
|
||||
Description: strings.TrimSpace(description),
|
||||
Source: "DuckDuckGo",
|
||||
}
|
||||
results = append(results, result)
|
||||
if debugMode {
|
||||
log.Printf("Processed DuckDuckGo result: %+v\n", result)
|
||||
}
|
||||
} else {
|
||||
if debugMode {
|
||||
log.Printf("Missing 'uddg' parameter in URL: %s\n", rawURL)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if debugMode {
|
||||
log.Printf("Error parsing URL: %s, error: %v\n", rawURL, err)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if debugMode {
|
||||
log.Printf("Missing 'href' attribute in result anchor tag\n")
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
if len(results) == 0 {
|
||||
if debugMode {
|
||||
log.Println("No results found from DuckDuckGo")
|
||||
}
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func buildDuckDuckGoSearchURL(query string, page int) string {
|
||||
startParam := ""
|
||||
if page > 1 {
|
||||
startParam = fmt.Sprintf("&s=%d", (page-1)*10)
|
||||
}
|
||||
return fmt.Sprintf("https://duckduckgo.com/html/?q=%s%s", url.QueryEscape(query), startParam)
|
||||
}
|
|
@ -1,62 +1,47 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/chromedp/chromedp"
|
||||
)
|
||||
|
||||
func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
|
||||
const resultsPerPage = 10
|
||||
opts := append(chromedp.DefaultExecAllocatorOptions[:],
|
||||
chromedp.DisableGPU,
|
||||
chromedp.NoDefaultBrowserCheck,
|
||||
chromedp.NoFirstRun,
|
||||
chromedp.Flag("disable-javascript", true),
|
||||
)
|
||||
ctx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
|
||||
defer cancel()
|
||||
|
||||
ctx, cancel = chromedp.NewContext(ctx)
|
||||
defer cancel()
|
||||
|
||||
var results []TextSearchResult
|
||||
|
||||
client := &http.Client{}
|
||||
searchURL := buildSearchURL(query, safe, lang, page, resultsPerPage)
|
||||
|
||||
req, err := http.NewRequest("GET", searchURL, nil)
|
||||
searchURL := buildSearchURL(query, safe, lang, page, 10)
|
||||
var pageSource string
|
||||
err := chromedp.Run(ctx,
|
||||
chromedp.Navigate(searchURL),
|
||||
chromedp.Sleep(2*time.Second),
|
||||
chromedp.OuterHTML("html", &pageSource),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %v", err)
|
||||
return nil, fmt.Errorf("failed to retrieve page source: %v", err)
|
||||
}
|
||||
|
||||
// User Agent generation
|
||||
TextUserAgent, err := GetUserAgent("Text-Search")
|
||||
newResults, err := parseResults(pageSource)
|
||||
if err != nil {
|
||||
fmt.Println("Error:", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if debugMode {
|
||||
fmt.Println("Generated User Agent (text):", TextUserAgent)
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", TextUserAgent)
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("making request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("loading HTML document: %v", err)
|
||||
}
|
||||
|
||||
results = parseResults(doc)
|
||||
|
||||
if len(results) == 0 {
|
||||
if debugMode {
|
||||
log.Println("No results found from Google")
|
||||
}
|
||||
return nil, fmt.Errorf("error parsing results: %v", err)
|
||||
}
|
||||
results = append(results, newResults...)
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
@ -72,20 +57,23 @@ func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string {
|
|||
langParam = "&lr=" + lang
|
||||
}
|
||||
|
||||
startIndex := (page - 1) * resultsPerPage
|
||||
return fmt.Sprintf("https://www.google.com/search?q=%s%s%s&udm=14&start=%d", url.QueryEscape(query), safeParam, langParam, startIndex)
|
||||
startParam := fmt.Sprintf("&start=%d", (page-1)*resultsPerPage)
|
||||
|
||||
return fmt.Sprintf("https://www.google.com/search?q=%s%s%s%s", url.QueryEscape(query), safeParam, langParam, startParam)
|
||||
}
|
||||
|
||||
func parseResults(doc *goquery.Document) []TextSearchResult {
|
||||
func parseResults(pageSource string) ([]TextSearchResult, error) {
|
||||
var results []TextSearchResult
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(pageSource))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("loading HTML document: %v", err)
|
||||
}
|
||||
|
||||
doc.Find(".yuRUbf").Each(func(i int, s *goquery.Selection) {
|
||||
link := s.Find("a")
|
||||
href, exists := link.Attr("href")
|
||||
if !exists {
|
||||
if debugMode {
|
||||
log.Printf("No href attribute found for result %d\n", i)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -104,10 +92,7 @@ func parseResults(doc *goquery.Document) []TextSearchResult {
|
|||
Description: description,
|
||||
}
|
||||
results = append(results, result)
|
||||
if debugMode {
|
||||
log.Printf("Google result: %+v\n", result)
|
||||
}
|
||||
})
|
||||
|
||||
return results
|
||||
return results, nil
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@ type LibreXResponse []LibreXResult
|
|||
|
||||
func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
|
||||
// LibreX uses page starting from 0
|
||||
searchURL := fmt.Sprintf("https://%s/api.php?q=%s&p=%d&t=0", LIBREX_DOMAIN, url.QueryEscape(query), page-1)
|
||||
searchURL := fmt.Sprintf("https://%s/api.php?q=%s&p=%d&t=0", LIBREX_DOMAIN, url.QueryEscape(query), page)
|
||||
|
||||
// User Agent generation
|
||||
userAgent, err := GetUserAgent("librex-text-search")
|
||||
|
@ -63,10 +63,6 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe
|
|||
Source: "LibreX",
|
||||
}
|
||||
|
||||
if debugMode {
|
||||
log.Printf("LibreX result: %+v\n", result)
|
||||
}
|
||||
|
||||
results = append(results, result)
|
||||
}
|
||||
|
||||
|
|
|
@ -3,7 +3,6 @@ package main
|
|||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"time"
|
||||
|
@ -27,11 +26,9 @@ type QwantTextAPIResponse struct {
|
|||
}
|
||||
|
||||
// PerformQwantTextSearch contacts the Qwant API and returns a slice of TextSearchResult
|
||||
func PerformQwantTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
|
||||
func PerformQwantTextSearch(query, safe, lang string) ([]TextSearchResult, error) {
|
||||
const resultsPerPage = 10
|
||||
|
||||
// Calculate the offset based on the page number
|
||||
offset := (page - 1) * resultsPerPage
|
||||
const offset = 0
|
||||
|
||||
// Ensure safe search is disabled by default if not specified
|
||||
if safe == "" {
|
||||
|
@ -43,12 +40,11 @@ func PerformQwantTextSearch(query, safe, lang string, page int) ([]TextSearchRes
|
|||
lang = "en_CA"
|
||||
}
|
||||
|
||||
apiURL := fmt.Sprintf("https://api.qwant.com/v3/search/web?q=%s&count=%d&locale=%s&offset=%d&device=desktop&safesearch=%s",
|
||||
apiURL := fmt.Sprintf("https://api.qwant.com/v3/search/web?q=%s&count=%d&locale=%s&offset=%d&device=desktop",
|
||||
url.QueryEscape(query),
|
||||
resultsPerPage,
|
||||
lang,
|
||||
offset,
|
||||
safe)
|
||||
offset)
|
||||
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
|
||||
|
@ -97,9 +93,6 @@ func PerformQwantTextSearch(query, safe, lang string, page int) ([]TextSearchRes
|
|||
func cleanQwantURL(rawURL string) string {
|
||||
u, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
if debugMode {
|
||||
log.Printf("Error parsing URL: %v", err)
|
||||
}
|
||||
return rawURL
|
||||
}
|
||||
return u.Scheme + "://" + u.Host + u.Path
|
||||
|
|
170
text.go
170
text.go
|
@ -1,44 +1,56 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"html/template"
|
||||
"log"
|
||||
"math/rand"
|
||||
"net/http"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
debugMode bool
|
||||
searchEngines []searchEngine
|
||||
searchEngineLock sync.Mutex
|
||||
)
|
||||
|
||||
type searchEngine struct {
|
||||
Name string
|
||||
Func func(string, string, string, int) ([]TextSearchResult, error)
|
||||
Weight int
|
||||
}
|
||||
|
||||
func init() {
|
||||
flag.BoolVar(&debugMode, "debug", false, "enable debug mode")
|
||||
flag.Parse()
|
||||
searchEngines = []searchEngine{
|
||||
{Name: "Google", Func: PerformGoogleTextSearch, Weight: 1},
|
||||
{Name: "LibreX", Func: PerformLibreXTextSearch, Weight: 2},
|
||||
// {Name: "DuckDuckGo", Func: PerformDuckDuckGoTextSearch, Weight: 3}, // DuckDuckGo timeouts too fast and search results are trash
|
||||
// {Name: "SearXNG", Func: PerformSearXNGTextSearch, Weight: 2}, // Uncomment when implemented
|
||||
}
|
||||
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
}
|
||||
|
||||
func HandleTextSearch(w http.ResponseWriter, query, safe, lang string, page int) {
|
||||
startTime := time.Now()
|
||||
const resultsPerPage = 10
|
||||
|
||||
cacheKey := CacheKey{Query: query, Page: page, Safe: safe == "true", Lang: lang, Type: "text"}
|
||||
combinedResults := getTextResultsFromCacheOrFetch(cacheKey, query, safe, lang, page, resultsPerPage)
|
||||
combinedResults := getTextResultsFromCacheOrFetch(cacheKey, query, safe, lang, page)
|
||||
|
||||
hasPrevPage := page > 1
|
||||
hasNextPage := len(combinedResults) == resultsPerPage
|
||||
hasNextPage := len(combinedResults) > 0
|
||||
|
||||
displayResults(w, combinedResults, query, lang, time.Since(startTime).Seconds(), page, hasPrevPage, hasNextPage)
|
||||
|
||||
// Always check and cache the next page if not enough results
|
||||
if hasNextPage {
|
||||
go cacheNextPageIfNotCached(query, safe, lang, page+1, resultsPerPage)
|
||||
// Prefetch next and previous pages
|
||||
go prefetchPage(query, safe, lang, page+1)
|
||||
if hasPrevPage {
|
||||
go prefetchPage(query, safe, lang, page-1)
|
||||
}
|
||||
}
|
||||
|
||||
func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string, page, resultsPerPage int) []TextSearchResult {
|
||||
func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string, page int) []TextSearchResult {
|
||||
cacheChan := make(chan []SearchResult)
|
||||
var combinedResults []TextSearchResult
|
||||
|
||||
|
@ -56,7 +68,7 @@ func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string,
|
|||
select {
|
||||
case results := <-cacheChan:
|
||||
if results == nil {
|
||||
combinedResults = fetchTextResultsUntilFull(query, safe, lang, page, resultsPerPage)
|
||||
combinedResults = fetchTextResults(query, safe, lang, page)
|
||||
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
|
||||
} else {
|
||||
textResults, _, _ := convertToSpecificResults(results)
|
||||
|
@ -64,129 +76,63 @@ func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string,
|
|||
}
|
||||
case <-time.After(2 * time.Second):
|
||||
log.Println("Cache check timeout")
|
||||
combinedResults = fetchTextResultsUntilFull(query, safe, lang, page, resultsPerPage)
|
||||
combinedResults = fetchTextResults(query, safe, lang, page)
|
||||
resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
|
||||
}
|
||||
|
||||
return combinedResults
|
||||
}
|
||||
|
||||
func cacheNextPageIfNotCached(query, safe, lang string, page, resultsPerPage int) {
|
||||
func prefetchPage(query, safe, lang string, page int) {
|
||||
cacheKey := CacheKey{Query: query, Page: page, Safe: safe == "true", Lang: lang, Type: "text"}
|
||||
if _, exists := resultsCache.Get(cacheKey); !exists {
|
||||
log.Printf("Next page %d not cached, caching now...", page)
|
||||
nextPageResults := fetchTextResultsUntilFull(query, safe, lang, page, resultsPerPage)
|
||||
resultsCache.Set(cacheKey, convertToSearchResults(nextPageResults))
|
||||
log.Printf("Page %d not cached, caching now...", page)
|
||||
pageResults := fetchTextResults(query, safe, lang, page)
|
||||
resultsCache.Set(cacheKey, convertToSearchResults(pageResults))
|
||||
} else {
|
||||
log.Printf("Next page %d already cached", page)
|
||||
log.Printf("Page %d already cached", page)
|
||||
}
|
||||
}
|
||||
|
||||
func fetchTextResultsUntilFull(query, safe, lang string, targetPage, resultsPerPage int) []TextSearchResult {
|
||||
var combinedResults []TextSearchResult
|
||||
currentPage := 1
|
||||
resultsNeeded := targetPage * resultsPerPage
|
||||
func fetchTextResults(query, safe, lang string, page int) []TextSearchResult {
|
||||
engine := selectSearchEngine()
|
||||
log.Printf("Using search engine: %s", engine.Name)
|
||||
|
||||
for len(combinedResults) < resultsNeeded {
|
||||
cacheKey := CacheKey{Query: query, Page: targetPage, Safe: safe == "true", Lang: lang, Type: "text"}
|
||||
cachedResults, exists := resultsCache.Get(cacheKey)
|
||||
if exists {
|
||||
textResults, _, _ := convertToSpecificResults(cachedResults)
|
||||
combinedResults = append(combinedResults, textResults...)
|
||||
} else {
|
||||
results := fetchAndCacheTextResults(query, safe, lang, currentPage, resultsPerPage)
|
||||
if len(results) == 0 {
|
||||
break
|
||||
}
|
||||
combinedResults = append(combinedResults, results...)
|
||||
resultsCache.Set(cacheKey, convertToSearchResults(results))
|
||||
results, err := engine.Func(query, safe, lang, page)
|
||||
if err != nil {
|
||||
log.Printf("Error performing search with %s: %v", engine.Name, err)
|
||||
return nil
|
||||
}
|
||||
|
||||
currentPage++
|
||||
|
||||
// Stop fetching if we have enough results for the target page and the next page
|
||||
if len(combinedResults) >= resultsNeeded+resultsPerPage {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
startIndex := (targetPage - 1) * resultsPerPage
|
||||
endIndex := startIndex + resultsPerPage
|
||||
|
||||
if startIndex >= len(combinedResults) {
|
||||
return []TextSearchResult{}
|
||||
}
|
||||
if endIndex > len(combinedResults) {
|
||||
endIndex = len(combinedResults)
|
||||
}
|
||||
|
||||
return combinedResults[startIndex:endIndex]
|
||||
return results
|
||||
}
|
||||
|
||||
func fetchAndCacheTextResults(query, safe, lang string, page, resultsPerPage int) []TextSearchResult {
|
||||
var combinedResults []TextSearchResult
|
||||
var wg sync.WaitGroup
|
||||
var mu sync.Mutex
|
||||
func selectSearchEngine() searchEngine {
|
||||
searchEngineLock.Lock()
|
||||
defer searchEngineLock.Unlock()
|
||||
|
||||
resultsChan := make(chan []TextSearchResult)
|
||||
|
||||
searchFuncs := []struct {
|
||||
Func func(string, string, string, int) ([]TextSearchResult, error)
|
||||
Source string
|
||||
}{
|
||||
{PerformGoogleTextSearch, "Google"},
|
||||
{PerformLibreXTextSearch, "LibreX"},
|
||||
// {PerformSearXNGTextSearch, "SearXNG"},
|
||||
totalWeight := 0
|
||||
for _, engine := range searchEngines {
|
||||
totalWeight += engine.Weight
|
||||
}
|
||||
|
||||
wg.Add(len(searchFuncs))
|
||||
|
||||
for _, searchFunc := range searchFuncs {
|
||||
go func(searchFunc func(string, string, string, int) ([]TextSearchResult, error), source string) {
|
||||
defer wg.Done()
|
||||
results, err := searchFunc(query, safe, lang, page)
|
||||
if err == nil {
|
||||
for i := range results {
|
||||
results[i].Source = source
|
||||
}
|
||||
resultsChan <- results
|
||||
randValue := rand.Intn(totalWeight)
|
||||
for _, engine := range searchEngines {
|
||||
if randValue < engine.Weight {
|
||||
// Adjust weights for load balancing
|
||||
for i := range searchEngines {
|
||||
if searchEngines[i].Name == engine.Name {
|
||||
searchEngines[i].Weight = max(1, searchEngines[i].Weight-1)
|
||||
} else {
|
||||
log.Printf("Error performing search from %s: %v", source, err)
|
||||
searchEngines[i].Weight++
|
||||
}
|
||||
}(searchFunc.Func, searchFunc.Source)
|
||||
}
|
||||
return engine
|
||||
}
|
||||
randValue -= engine.Weight
|
||||
}
|
||||
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(resultsChan)
|
||||
}()
|
||||
|
||||
for results := range resultsChan {
|
||||
mu.Lock()
|
||||
combinedResults = append(combinedResults, results...)
|
||||
mu.Unlock()
|
||||
}
|
||||
|
||||
sort.SliceStable(combinedResults, func(i, j int) bool {
|
||||
return sourceOrder(combinedResults[i].Source) < sourceOrder(combinedResults[j].Source)
|
||||
})
|
||||
|
||||
log.Printf("Fetched %d results for page %d", len(combinedResults), page)
|
||||
|
||||
return combinedResults
|
||||
}
|
||||
|
||||
func sourceOrder(source string) int {
|
||||
switch source {
|
||||
case "Google":
|
||||
return 1
|
||||
case "LibreX":
|
||||
return 2
|
||||
case "SearchXNG":
|
||||
return 3
|
||||
default:
|
||||
return 4
|
||||
}
|
||||
return searchEngines[0] // fallback to the first engine
|
||||
}
|
||||
|
||||
func displayResults(w http.ResponseWriter, results []TextSearchResult, query, lang string, elapsed float64, page int, hasPrevPage, hasNextPage bool) {
|
||||
|
|
Loading…
Reference in a new issue