From 5614da46482aeea9891e22f0120c837933ca7e61 Mon Sep 17 00:00:00 2001 From: partisan Date: Sun, 9 Jun 2024 12:43:46 +0200 Subject: [PATCH 01/17] wip, revert changes --- get-searchxng.go | 14 +++--- go.mod | 12 +++++- go.sum | 22 ++++++++++ text-duckduckgo.go | 77 ++------------------------------- text-google.go | 105 +++++++++++++++++++++++---------------------- text-quant.go | 15 ++----- text.go | 2 +- 7 files changed, 103 insertions(+), 144 deletions(-) diff --git a/get-searchxng.go b/get-searchxng.go index cb88261..b6da71a 100644 --- a/get-searchxng.go +++ b/get-searchxng.go @@ -128,10 +128,10 @@ func isInstanceValid(instance SearXInstance) bool { } } -func main() { - instance, err := getRandomSearXInstance() - if err != nil { - log.Fatalf("Failed to get a SearX instance: %v", err) - } - fmt.Printf("Selected SearX instance: %s\n", instance.URL) -} +// func main() { +// instance, err := getRandomSearXInstance() +// if err != nil { +// log.Fatalf("Failed to get a SearX instance: %v", err) +// } +// fmt.Printf("Selected SearX instance: %s\n", instance.URL) +// } diff --git a/go.mod b/go.mod index 4f6d6fa..0cca960 100644 --- a/go.mod +++ b/go.mod @@ -2,8 +2,18 @@ module searchengine go 1.18 +require github.com/PuerkitoBio/goquery v1.9.1 // direct + require ( - github.com/PuerkitoBio/goquery v1.9.1 // direct github.com/andybalholm/cascadia v1.3.2 // indirect + github.com/chromedp/cdproto v0.0.0-20240202021202-6d0b6a386732 // indirect + github.com/chromedp/chromedp v0.9.5 // indirect + github.com/chromedp/sysutil v1.0.0 // indirect + github.com/gobwas/httphead v0.1.0 // indirect + github.com/gobwas/pool v0.2.1 // indirect + github.com/gobwas/ws v1.3.2 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/mailru/easyjson v0.7.7 // indirect golang.org/x/net v0.21.0 // indirect + golang.org/x/sys v0.17.0 // indirect ) diff --git a/go.sum b/go.sum index f988942..f919d3c 100644 --- a/go.sum +++ b/go.sum @@ -2,6 +2,24 @@ github.com/PuerkitoBio/goquery v1.9.1 h1:mTL6XjbJTZdpfL+Gwl5U2h1l9yEkJjhmlTeV9VP github.com/PuerkitoBio/goquery v1.9.1/go.mod h1:cW1n6TmIMDoORQU5IU/P1T3tGFunOeXEpGP2WHRwkbY= github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss= github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU= +github.com/chromedp/cdproto v0.0.0-20240202021202-6d0b6a386732 h1:XYUCaZrW8ckGWlCRJKCSoh/iFwlpX316a8yY9IFEzv8= +github.com/chromedp/cdproto v0.0.0-20240202021202-6d0b6a386732/go.mod h1:GKljq0VrfU4D5yc+2qA6OVr8pmO/MBbPEWqWQ/oqGEs= +github.com/chromedp/chromedp v0.9.5 h1:viASzruPJOiThk7c5bueOUY91jGLJVximoEMGoH93rg= +github.com/chromedp/chromedp v0.9.5/go.mod h1:D4I2qONslauw/C7INoCir1BJkSwBYMyZgx8X276z3+Y= +github.com/chromedp/sysutil v1.0.0 h1:+ZxhTpfpZlmchB58ih/LBHX52ky7w2VhQVKQMucy3Ic= +github.com/chromedp/sysutil v1.0.0/go.mod h1:kgWmDdq8fTzXYcKIBqIYvRRTnYb9aNS9moAV0xufSww= +github.com/gobwas/httphead v0.1.0 h1:exrUm0f4YX0L7EBwZHuCF4GDp8aJfVeBrlLQrs6NqWU= +github.com/gobwas/httphead v0.1.0/go.mod h1:O/RXo79gxV8G+RqlR/otEwx4Q36zl9rqC5u12GKvMCM= +github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og= +github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= +github.com/gobwas/ws v1.3.2 h1:zlnbNHxumkRvfPWgfXu8RBwyNR1x8wh9cf5PTOCqs9Q= +github.com/gobwas/ws v1.3.2/go.mod h1:hRKAFb8wOxFROYNsT1bqfWnhX+b5MFeJM9r2ZSwg/KY= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/ledongthuc/pdf v0.0.0-20220302134840-0c2507a12d80/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= @@ -23,7 +41,11 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y= +golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= diff --git a/text-duckduckgo.go b/text-duckduckgo.go index 881256b..d003895 100644 --- a/text-duckduckgo.go +++ b/text-duckduckgo.go @@ -1,3 +1,4 @@ +// text-duckduckgo.go package main import ( @@ -6,66 +7,15 @@ import ( "net/http" "net/url" "strings" - "time" "github.com/PuerkitoBio/goquery" ) -const ( - resultsPerPage = 10 -) - -func getVQD(query string) (string, error) { - queryURL := fmt.Sprintf("https://duckduckgo.com/?q=%s", url.QueryEscape(query)) - resp, err := http.Get(queryURL) - if err != nil { - return "", fmt.Errorf("failed to fetch vqd: %v", err) - } - defer resp.Body.Close() - - doc, err := goquery.NewDocumentFromReader(resp.Body) - if err != nil { - return "", fmt.Errorf("loading HTML document: %v", err) - } - - var vqd string - doc.Find("script").Each(func(i int, s *goquery.Selection) { - text := s.Text() - if strings.Contains(text, "vqd=\"") { - start := strings.Index(text, "vqd=\"") + 5 - end := strings.Index(text[start:], "\"") - vqd = text[start : start+end] - } - }) - - if vqd == "" { - return "", fmt.Errorf("vqd not found") - } - - return vqd, nil -} - -func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) { +func PerformDuckDuckGoTextSearch(query, safe, lang string) ([]TextSearchResult, error) { var results []TextSearchResult + searchURL := fmt.Sprintf("https://duckduckgo.com/html/?q=%s", url.QueryEscape(query)) - client := &http.Client{Timeout: 10 * time.Second} - - vqd, err := getVQD(query) - if err != nil { - return nil, fmt.Errorf("failed to get vqd: %v", err) - } - - searchURL := fmt.Sprintf("https://duckduckgo.com/html/?q=%s&kl=%s&safe=%s&s=%d&vqd=%s", - url.QueryEscape(query), lang, safe, (page-1)*resultsPerPage, vqd) - - req, err := http.NewRequest("GET", searchURL, nil) - if err != nil { - return nil, fmt.Errorf("failed to create request: %v", err) - } - - req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36") - - resp, err := client.Do(req) + resp, err := http.Get(searchURL) if err != nil { return nil, fmt.Errorf("making request: %v", err) } @@ -94,34 +44,15 @@ func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSear URL: uddg, Header: strings.TrimSpace(header), Description: strings.TrimSpace(description), - Source: "DuckDuckGo", } results = append(results, result) if debugMode { log.Printf("Processed DuckDuckGo result: %+v\n", result) } - } else { - if debugMode { - log.Printf("Missing 'uddg' parameter in URL: %s\n", rawURL) - } } - } else { - if debugMode { - log.Printf("Error parsing URL: %s, error: %v\n", rawURL, err) - } - } - } else { - if debugMode { - log.Printf("Missing 'href' attribute in result anchor tag\n") } } }) - if len(results) == 0 { - if debugMode { - log.Println("No results found from DuckDuckGo") - } - } - return results, nil } diff --git a/text-google.go b/text-google.go index 9c338cc..c69c5ba 100644 --- a/text-google.go +++ b/text-google.go @@ -1,61 +1,66 @@ package main import ( + "context" "fmt" - "log" - "net/http" "net/url" "strings" + "time" "github.com/PuerkitoBio/goquery" + "github.com/chromedp/chromedp" ) -func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) { - const resultsPerPage = 10 +// type TextSearchResult struct { +// URL string +// Header string +// Description string +// } + +// func main() { +// // Example usage +// results, err := PerformGoogleTextSearch("golang", "off", "lang_en", 2) +// if err != nil { +// log.Fatalf("Error performing search: %v", err) +// } + +// for _, result := range results { +// fmt.Printf("URL: %s\nHeader: %s\nDescription: %s\n", result.URL, result.Header, result.Description) +// } +// } + +func PerformGoogleTextSearch(query, safe, lang string, numPages int) ([]TextSearchResult, error) { + ctx, cancel := chromedp.NewContext(context.Background()) + defer cancel() + var results []TextSearchResult - client := &http.Client{} - searchURL := buildSearchURL(query, safe, lang, page, resultsPerPage) + searchURL := buildSearchURL(query, safe, lang, 1, 10) + + err := chromedp.Run(ctx, + chromedp.Navigate(searchURL), + ) - req, err := http.NewRequest("GET", searchURL, nil) if err != nil { - return nil, fmt.Errorf("failed to create request: %v", err) + return nil, fmt.Errorf("failed to navigate to search URL: %v", err) } - // User Agent generation - TextUserAgent, err := GetUserAgent("Text-Search") - if err != nil { - fmt.Println("Error:", err) - return nil, err - } - - if debugMode { - fmt.Println("Generated User Agent (text):", TextUserAgent) - } - - req.Header.Set("User-Agent", TextUserAgent) - - resp, err := client.Do(req) - if err != nil { - return nil, fmt.Errorf("making request: %v", err) - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) - } - - doc, err := goquery.NewDocumentFromReader(resp.Body) - if err != nil { - return nil, fmt.Errorf("loading HTML document: %v", err) - } - - results = parseResults(doc) - - if len(results) == 0 { - if debugMode { - log.Println("No results found from Google") + for page := 1; page <= numPages; page++ { + var pageSource string + err := chromedp.Run(ctx, + chromedp.Sleep(2*time.Second), + chromedp.OuterHTML("html", &pageSource), + chromedp.Evaluate(`window.scrollTo(0, document.body.scrollHeight);`, nil), + ) + if err != nil { + return nil, fmt.Errorf("failed to retrieve page source: %v", err) } + + newResults, err := parseResults(pageSource) + if err != nil { + return nil, fmt.Errorf("error parsing results: %v", err) + } + results = append(results, newResults...) } return results, nil @@ -72,20 +77,21 @@ func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string { langParam = "&lr=" + lang } - startIndex := (page - 1) * resultsPerPage - return fmt.Sprintf("https://www.google.com/search?q=%s%s%s&udm=14&start=%d", url.QueryEscape(query), safeParam, langParam, startIndex) + return fmt.Sprintf("https://www.google.com/search?q=%s%s%s", url.QueryEscape(query), safeParam, langParam) } -func parseResults(doc *goquery.Document) []TextSearchResult { +func parseResults(pageSource string) ([]TextSearchResult, error) { var results []TextSearchResult + doc, err := goquery.NewDocumentFromReader(strings.NewReader(pageSource)) + if err != nil { + return nil, fmt.Errorf("loading HTML document: %v", err) + } + doc.Find(".yuRUbf").Each(func(i int, s *goquery.Selection) { link := s.Find("a") href, exists := link.Attr("href") if !exists { - if debugMode { - log.Printf("No href attribute found for result %d\n", i) - } return } @@ -104,10 +110,7 @@ func parseResults(doc *goquery.Document) []TextSearchResult { Description: description, } results = append(results, result) - if debugMode { - log.Printf("Google result: %+v\n", result) - } }) - return results + return results, nil } diff --git a/text-quant.go b/text-quant.go index c090ffe..de8b03a 100644 --- a/text-quant.go +++ b/text-quant.go @@ -3,7 +3,6 @@ package main import ( "encoding/json" "fmt" - "log" "net/http" "net/url" "time" @@ -27,11 +26,9 @@ type QwantTextAPIResponse struct { } // PerformQwantTextSearch contacts the Qwant API and returns a slice of TextSearchResult -func PerformQwantTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) { +func PerformQwantTextSearch(query, safe, lang string) ([]TextSearchResult, error) { const resultsPerPage = 10 - - // Calculate the offset based on the page number - offset := (page - 1) * resultsPerPage + const offset = 0 // Ensure safe search is disabled by default if not specified if safe == "" { @@ -43,12 +40,11 @@ func PerformQwantTextSearch(query, safe, lang string, page int) ([]TextSearchRes lang = "en_CA" } - apiURL := fmt.Sprintf("https://api.qwant.com/v3/search/web?q=%s&count=%d&locale=%s&offset=%d&device=desktop&safesearch=%s", + apiURL := fmt.Sprintf("https://api.qwant.com/v3/search/web?q=%s&count=%d&locale=%s&offset=%d&device=desktop", url.QueryEscape(query), resultsPerPage, lang, - offset, - safe) + offset) client := &http.Client{Timeout: 10 * time.Second} @@ -97,9 +93,6 @@ func PerformQwantTextSearch(query, safe, lang string, page int) ([]TextSearchRes func cleanQwantURL(rawURL string) string { u, err := url.Parse(rawURL) if err != nil { - if debugMode { - log.Printf("Error parsing URL: %v", err) - } return rawURL } return u.Scheme + "://" + u.Host + u.Path diff --git a/text.go b/text.go index 6845fab..6d7c727 100644 --- a/text.go +++ b/text.go @@ -135,7 +135,7 @@ func fetchAndCacheTextResults(query, safe, lang string, page, resultsPerPage int Source string }{ {PerformGoogleTextSearch, "Google"}, - {PerformLibreXTextSearch, "LibreX"}, + // {PerformLibreXTextSearch, "LibreX"}, // {PerformSearXNGTextSearch, "SearXNG"}, } -- 2.40.1 From dae80c06848e00b66df30c69e4f83030aaf9d615 Mon Sep 17 00:00:00 2001 From: admin Date: Sun, 9 Jun 2024 21:44:49 +0200 Subject: [PATCH 02/17] fix for pages --- run.sh | 2 +- text-duckduckgo.go | 12 +++- text-google.go | 62 ++++++---------- text-librex.go | 6 +- text.go | 174 +++++++++++++++++---------------------------- 5 files changed, 99 insertions(+), 157 deletions(-) diff --git a/run.sh b/run.sh index 9b6d4d8..aa4f4f3 100755 --- a/run.sh +++ b/run.sh @@ -1,3 +1,3 @@ #!/bin/bash -go run main.go images.go imageproxy.go video.go map.go text.go text-searchxng.go text-librex.go text-google.go cache.go forums.go files.go files-torrentgalaxy.go files-thepiratebay.go agent.go --debug \ No newline at end of file +go run main.go images.go imageproxy.go video.go map.go text.go text-searchxng.go text-librex.go text-google.go cache.go forums.go files.go files-torrentgalaxy.go files-thepiratebay.go agent.go \ No newline at end of file diff --git a/text-duckduckgo.go b/text-duckduckgo.go index d003895..56d098f 100644 --- a/text-duckduckgo.go +++ b/text-duckduckgo.go @@ -11,9 +11,9 @@ import ( "github.com/PuerkitoBio/goquery" ) -func PerformDuckDuckGoTextSearch(query, safe, lang string) ([]TextSearchResult, error) { +func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) { var results []TextSearchResult - searchURL := fmt.Sprintf("https://duckduckgo.com/html/?q=%s", url.QueryEscape(query)) + searchURL := buildDuckDuckGoSearchURL(query, page) resp, err := http.Get(searchURL) if err != nil { @@ -56,3 +56,11 @@ func PerformDuckDuckGoTextSearch(query, safe, lang string) ([]TextSearchResult, return results, nil } + +func buildDuckDuckGoSearchURL(query string, page int) string { + startParam := "" + if page > 1 { + startParam = fmt.Sprintf("&s=%d", (page-1)*10) + } + return fmt.Sprintf("https://duckduckgo.com/html/?q=%s%s", url.QueryEscape(query), startParam) +} \ No newline at end of file diff --git a/text-google.go b/text-google.go index c69c5ba..971c407 100644 --- a/text-google.go +++ b/text-google.go @@ -11,57 +11,37 @@ import ( "github.com/chromedp/chromedp" ) -// type TextSearchResult struct { -// URL string -// Header string -// Description string -// } +func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) { + opts := append(chromedp.DefaultExecAllocatorOptions[:], + chromedp.DisableGPU, + chromedp.NoDefaultBrowserCheck, + chromedp.NoFirstRun, + chromedp.Flag("disable-javascript", true), + ) + ctx, cancel := chromedp.NewExecAllocator(context.Background(), opts...) + defer cancel() -// func main() { -// // Example usage -// results, err := PerformGoogleTextSearch("golang", "off", "lang_en", 2) -// if err != nil { -// log.Fatalf("Error performing search: %v", err) -// } - -// for _, result := range results { -// fmt.Printf("URL: %s\nHeader: %s\nDescription: %s\n", result.URL, result.Header, result.Description) -// } -// } - -func PerformGoogleTextSearch(query, safe, lang string, numPages int) ([]TextSearchResult, error) { - ctx, cancel := chromedp.NewContext(context.Background()) + ctx, cancel = chromedp.NewContext(ctx) defer cancel() var results []TextSearchResult - searchURL := buildSearchURL(query, safe, lang, 1, 10) - + searchURL := buildSearchURL(query, safe, lang, page, 10) + var pageSource string err := chromedp.Run(ctx, chromedp.Navigate(searchURL), + chromedp.Sleep(2*time.Second), + chromedp.OuterHTML("html", &pageSource), ) - if err != nil { - return nil, fmt.Errorf("failed to navigate to search URL: %v", err) + return nil, fmt.Errorf("failed to retrieve page source: %v", err) } - for page := 1; page <= numPages; page++ { - var pageSource string - err := chromedp.Run(ctx, - chromedp.Sleep(2*time.Second), - chromedp.OuterHTML("html", &pageSource), - chromedp.Evaluate(`window.scrollTo(0, document.body.scrollHeight);`, nil), - ) - if err != nil { - return nil, fmt.Errorf("failed to retrieve page source: %v", err) - } - - newResults, err := parseResults(pageSource) - if err != nil { - return nil, fmt.Errorf("error parsing results: %v", err) - } - results = append(results, newResults...) + newResults, err := parseResults(pageSource) + if err != nil { + return nil, fmt.Errorf("error parsing results: %v", err) } + results = append(results, newResults...) return results, nil } @@ -77,7 +57,9 @@ func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string { langParam = "&lr=" + lang } - return fmt.Sprintf("https://www.google.com/search?q=%s%s%s", url.QueryEscape(query), safeParam, langParam) + startParam := fmt.Sprintf("&start=%d", (page-1)*resultsPerPage) + + return fmt.Sprintf("https://www.google.com/search?q=%s%s%s%s", url.QueryEscape(query), safeParam, langParam, startParam) } func parseResults(pageSource string) ([]TextSearchResult, error) { diff --git a/text-librex.go b/text-librex.go index 450f20d..526d7e8 100644 --- a/text-librex.go +++ b/text-librex.go @@ -20,7 +20,7 @@ type LibreXResponse []LibreXResult func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) { // LibreX uses page starting from 0 - searchURL := fmt.Sprintf("https://%s/api.php?q=%s&p=%d&t=0", LIBREX_DOMAIN, url.QueryEscape(query), page-1) + searchURL := fmt.Sprintf("https://%s/api.php?q=%s&p=%d&t=0", LIBREX_DOMAIN, url.QueryEscape(query), page) // User Agent generation userAgent, err := GetUserAgent("librex-text-search") @@ -63,10 +63,6 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe Source: "LibreX", } - if debugMode { - log.Printf("LibreX result: %+v\n", result) - } - results = append(results, result) } diff --git a/text.go b/text.go index 6d7c727..ecf5d73 100644 --- a/text.go +++ b/text.go @@ -1,44 +1,59 @@ package main import ( - "flag" "fmt" "html/template" "log" + "math/rand" "net/http" - "sort" "sync" "time" ) var ( - debugMode bool + debugMode bool + searchEngines []searchEngine + searchEngineLock sync.Mutex ) +type searchEngine struct { + Name string + Func func(string, string, string, int) ([]TextSearchResult, error) + Weight int +} + func init() { - flag.BoolVar(&debugMode, "debug", false, "enable debug mode") - flag.Parse() + debugMode = false + + searchEngines = []searchEngine{ + {Name: "Google", Func: PerformGoogleTextSearch, Weight: 1}, + {Name: "LibreX", Func: PerformLibreXTextSearch, Weight: 2}, + // {Name: "DuckDuckGo", Func: PerformDuckDuckGoTextSearch, Weight: 3}, // DuckDuckGo timeouts too fast and search results are trash + // {Name: "SearXNG", Func: PerformSearXNGTextSearch, Weight: 2}, // Uncomment when implemented + } + + rand.Seed(time.Now().UnixNano()) } func HandleTextSearch(w http.ResponseWriter, query, safe, lang string, page int) { startTime := time.Now() - const resultsPerPage = 10 cacheKey := CacheKey{Query: query, Page: page, Safe: safe == "true", Lang: lang, Type: "text"} - combinedResults := getTextResultsFromCacheOrFetch(cacheKey, query, safe, lang, page, resultsPerPage) + combinedResults := getTextResultsFromCacheOrFetch(cacheKey, query, safe, lang, page) hasPrevPage := page > 1 - hasNextPage := len(combinedResults) == resultsPerPage + hasNextPage := len(combinedResults) > 0 displayResults(w, combinedResults, query, lang, time.Since(startTime).Seconds(), page, hasPrevPage, hasNextPage) - // Always check and cache the next page if not enough results - if hasNextPage { - go cacheNextPageIfNotCached(query, safe, lang, page+1, resultsPerPage) + // Prefetch next and previous pages + go prefetchPage(query, safe, lang, page+1) + if hasPrevPage { + go prefetchPage(query, safe, lang, page-1) } } -func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string, page, resultsPerPage int) []TextSearchResult { +func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string, page int) []TextSearchResult { cacheChan := make(chan []SearchResult) var combinedResults []TextSearchResult @@ -56,7 +71,7 @@ func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string, select { case results := <-cacheChan: if results == nil { - combinedResults = fetchTextResultsUntilFull(query, safe, lang, page, resultsPerPage) + combinedResults = fetchTextResults(query, safe, lang, page) resultsCache.Set(cacheKey, convertToSearchResults(combinedResults)) } else { textResults, _, _ := convertToSpecificResults(results) @@ -64,129 +79,70 @@ func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string, } case <-time.After(2 * time.Second): log.Println("Cache check timeout") - combinedResults = fetchTextResultsUntilFull(query, safe, lang, page, resultsPerPage) + combinedResults = fetchTextResults(query, safe, lang, page) resultsCache.Set(cacheKey, convertToSearchResults(combinedResults)) } return combinedResults } -func cacheNextPageIfNotCached(query, safe, lang string, page, resultsPerPage int) { +func prefetchPage(query, safe, lang string, page int) { cacheKey := CacheKey{Query: query, Page: page, Safe: safe == "true", Lang: lang, Type: "text"} if _, exists := resultsCache.Get(cacheKey); !exists { - log.Printf("Next page %d not cached, caching now...", page) - nextPageResults := fetchTextResultsUntilFull(query, safe, lang, page, resultsPerPage) - resultsCache.Set(cacheKey, convertToSearchResults(nextPageResults)) + log.Printf("Page %d not cached, caching now...", page) + pageResults := fetchTextResults(query, safe, lang, page) + resultsCache.Set(cacheKey, convertToSearchResults(pageResults)) } else { - log.Printf("Next page %d already cached", page) + log.Printf("Page %d already cached", page) } } -func fetchTextResultsUntilFull(query, safe, lang string, targetPage, resultsPerPage int) []TextSearchResult { - var combinedResults []TextSearchResult - currentPage := 1 - resultsNeeded := targetPage * resultsPerPage +func fetchTextResults(query, safe, lang string, page int) []TextSearchResult { + engine := selectSearchEngine() + log.Printf("Using search engine: %s", engine.Name) - for len(combinedResults) < resultsNeeded { - cacheKey := CacheKey{Query: query, Page: targetPage, Safe: safe == "true", Lang: lang, Type: "text"} - cachedResults, exists := resultsCache.Get(cacheKey) - if exists { - textResults, _, _ := convertToSpecificResults(cachedResults) - combinedResults = append(combinedResults, textResults...) - } else { - results := fetchAndCacheTextResults(query, safe, lang, currentPage, resultsPerPage) - if len(results) == 0 { - break - } - combinedResults = append(combinedResults, results...) - resultsCache.Set(cacheKey, convertToSearchResults(results)) - } - - currentPage++ - - // Stop fetching if we have enough results for the target page and the next page - if len(combinedResults) >= resultsNeeded+resultsPerPage { - break - } + results, err := engine.Func(query, safe, lang, page) + if err != nil { + log.Printf("Error performing search with %s: %v", engine.Name, err) + return nil } - startIndex := (targetPage - 1) * resultsPerPage - endIndex := startIndex + resultsPerPage - - if startIndex >= len(combinedResults) { - return []TextSearchResult{} - } - if endIndex > len(combinedResults) { - endIndex = len(combinedResults) - } - - return combinedResults[startIndex:endIndex] + return results } -func fetchAndCacheTextResults(query, safe, lang string, page, resultsPerPage int) []TextSearchResult { - var combinedResults []TextSearchResult - var wg sync.WaitGroup - var mu sync.Mutex +func selectSearchEngine() searchEngine { + searchEngineLock.Lock() + defer searchEngineLock.Unlock() - resultsChan := make(chan []TextSearchResult) - - searchFuncs := []struct { - Func func(string, string, string, int) ([]TextSearchResult, error) - Source string - }{ - {PerformGoogleTextSearch, "Google"}, - // {PerformLibreXTextSearch, "LibreX"}, - // {PerformSearXNGTextSearch, "SearXNG"}, + totalWeight := 0 + for _, engine := range searchEngines { + totalWeight += engine.Weight } - wg.Add(len(searchFuncs)) - - for _, searchFunc := range searchFuncs { - go func(searchFunc func(string, string, string, int) ([]TextSearchResult, error), source string) { - defer wg.Done() - results, err := searchFunc(query, safe, lang, page) - if err == nil { - for i := range results { - results[i].Source = source + randValue := rand.Intn(totalWeight) + for _, engine := range searchEngines { + if randValue < engine.Weight { + // Adjust weights for load balancing + for i := range searchEngines { + if searchEngines[i].Name == engine.Name { + searchEngines[i].Weight = max(1, searchEngines[i].Weight-1) + } else { + searchEngines[i].Weight++ } - resultsChan <- results - } else { - log.Printf("Error performing search from %s: %v", source, err) } - }(searchFunc.Func, searchFunc.Source) + return engine + } + randValue -= engine.Weight } - go func() { - wg.Wait() - close(resultsChan) - }() - - for results := range resultsChan { - mu.Lock() - combinedResults = append(combinedResults, results...) - mu.Unlock() - } - - sort.SliceStable(combinedResults, func(i, j int) bool { - return sourceOrder(combinedResults[i].Source) < sourceOrder(combinedResults[j].Source) - }) - - log.Printf("Fetched %d results for page %d", len(combinedResults), page) - - return combinedResults + return searchEngines[0] // fallback to the first engine } -func sourceOrder(source string) int { - switch source { - case "Google": - return 1 - case "LibreX": - return 2 - case "SearchXNG": - return 3 - default: - return 4 +func max(a, b int) int { + if a > b { + return a } + return b } func displayResults(w http.ResponseWriter, results []TextSearchResult, query, lang string, elapsed float64, page int, hasPrevPage, hasNextPage bool) { -- 2.40.1 From d057915e6631a12ba658fcd12bcb1afbb6e1601f Mon Sep 17 00:00:00 2001 From: admin Date: Sun, 9 Jun 2024 21:59:29 +0200 Subject: [PATCH 03/17] dynamic page loading --- templates/text.html | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/templates/text.html b/templates/text.html index eef8e4c..408c595 100644 --- a/templates/text.html +++ b/templates/text.html @@ -56,7 +56,7 @@ -
+
{{if .Results}} {{range .Results}}
@@ -70,7 +70,7 @@
No results found for '{{ .Query }}'. Try different keywords.
{{end}}
-
+
@@ -83,8 +83,35 @@
-- 2.40.1 From 5d0525b1e9bce6eec7bccc83ab645d77e0c41118 Mon Sep 17 00:00:00 2001 From: admin Date: Mon, 10 Jun 2024 11:49:40 +0200 Subject: [PATCH 04/17] added imgur + some cleanup --- common.go | 24 +++++++ go.mod | 1 + go.sum | 2 + images-imgur.go | 143 +++++++++++++++++++++++++++++++++++++++ images-quant.go | 95 ++++++++++++++++++++++++++ images.go | 174 ++++++++++++++++-------------------------------- run.sh | 2 +- text.go | 10 --- 8 files changed, 324 insertions(+), 127 deletions(-) create mode 100644 common.go create mode 100644 images-imgur.go create mode 100644 images-quant.go diff --git a/common.go b/common.go new file mode 100644 index 0000000..d1de775 --- /dev/null +++ b/common.go @@ -0,0 +1,24 @@ +package main + +import ( + "html/template" +) + +var ( + debugMode bool = true + funcs = template.FuncMap{ + "sub": func(a, b int) int { + return a - b + }, + "add": func(a, b int) int { + return a + b + }, + } +) + +func max(a, b int) int { + if a > b { + return a + } + return b +} diff --git a/go.mod b/go.mod index 0cca960..63d4e99 100644 --- a/go.mod +++ b/go.mod @@ -16,4 +16,5 @@ require ( github.com/mailru/easyjson v0.7.7 // indirect golang.org/x/net v0.21.0 // indirect golang.org/x/sys v0.17.0 // indirect + golang.org/x/time v0.5.0 // indirect ) diff --git a/go.sum b/go.sum index f919d3c..77a830d 100644 --- a/go.sum +++ b/go.sum @@ -55,6 +55,8 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= +golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= diff --git a/images-imgur.go b/images-imgur.go new file mode 100644 index 0000000..2e76879 --- /dev/null +++ b/images-imgur.go @@ -0,0 +1,143 @@ +package main + +import ( + "fmt" + "net/http" + "net/url" + "strconv" + "strings" + + "github.com/PuerkitoBio/goquery" +) + +// PerformImgurImageSearch performs an image search on Imgur and returns the results +func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, error) { + var results []ImageSearchResult + searchURL := buildImgurSearchURL(query, page) + + resp, err := http.Get(searchURL) + if err != nil { + return nil, fmt.Errorf("making request: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + } + + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + return nil, fmt.Errorf("loading HTML document: %v", err) + } + + doc.Find("div.cards div.post").Each(func(i int, s *goquery.Selection) { + thumbnailSrc, exists := s.Find("a img").Attr("src") + if !exists || len(thumbnailSrc) < 25 { + return + } + imgSrc := strings.Replace(thumbnailSrc, "b.", ".", 1) + + // Ensure the URLs have the correct protocol + if !strings.HasPrefix(thumbnailSrc, "http") { + thumbnailSrc = "https:" + thumbnailSrc + } + if !strings.HasPrefix(imgSrc, "http") { + imgSrc = "https:" + imgSrc + } + + urlPath, exists := s.Find("a").Attr("href") + if !exists { + return + } + + // Scrape the image directly from the Imgur page + imgSrc = scrapeImageFromImgurPage("https://imgur.com" + urlPath) + + // Remove any query parameters from the URL + imgSrc = removeQueryParameters(imgSrc) + + title, _ := s.Find("a img").Attr("alt") + + width, _ := strconv.Atoi(s.Find("a img").AttrOr("width", "0")) + height, _ := strconv.Atoi(s.Find("a img").AttrOr("height", "0")) + + results = append(results, ImageSearchResult{ + Thumbnail: thumbnailSrc, + Title: strings.TrimSpace(title), + Media: imgSrc, + Width: width, + Height: height, + Source: "https://imgur.com" + urlPath, + ThumbProxy: imgSrc, //"/img_proxy?url=" + url.QueryEscape(imgSrc) + }) + }) + + return results, nil +} + +// scrapeImageFromImgurPage scrapes the image source from the Imgur page +func scrapeImageFromImgurPage(pageURL string) string { + resp, err := http.Get(pageURL) + if err != nil { + fmt.Printf("Error fetching page: %v\n", err) + return "" + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + fmt.Printf("Unexpected status code: %d\n", resp.StatusCode) + return "" + } + + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + fmt.Printf("Error loading HTML document: %v\n", err) + return "" + } + + imgSrc, exists := doc.Find("meta[property='og:image']").Attr("content") + if !exists { + fmt.Printf("Image not found on page: %s\n", pageURL) + return "" + } + + // Ensure the URL has the correct protocol + if !strings.HasPrefix(imgSrc, "http") { + imgSrc = "https:" + imgSrc + } + + return imgSrc +} + +// removeQueryParameters removes query parameters from a URL +func removeQueryParameters(rawURL string) string { + parsedURL, err := url.Parse(rawURL) + if err != nil { + fmt.Printf("Error parsing URL: %v\n", err) + return rawURL + } + parsedURL.RawQuery = "" + return parsedURL.String() +} + +func buildImgurSearchURL(query string, page int) string { + baseURL := "https://imgur.com/search/score/all" + params := url.Values{} + params.Add("q", query) + params.Add("qs", "thumbs") + params.Add("p", fmt.Sprintf("%d", page-1)) + return fmt.Sprintf("%s?%s", baseURL, params.Encode()) +} + +// func main() { +// results, err := PerformImgurImageSearch("cats", "true", "en", 1) +// if err != nil { +// fmt.Println("Error:", err) +// return +// } + +// for _, result := range results { +// fmt.Printf("Title: %s\nSource: %s\nMedia: %s\nThumbnail: %s\nThumbProxy: %s\nWidth: %d\nHeight: %d\n\n", +// result.Title, result.Source, result.Media, result.Thumbnail, result.ThumbProxy, result.Width, result.Height) +// } +// } diff --git a/images-quant.go b/images-quant.go new file mode 100644 index 0000000..fa799f8 --- /dev/null +++ b/images-quant.go @@ -0,0 +1,95 @@ +package main + +import ( + "encoding/json" + "fmt" + "net/http" + "net/url" + "time" +) + +// QwantAPIResponse represents the JSON response structure from Qwant API +type QwantAPIResponse struct { + Data struct { + Result struct { + Items []struct { + Media string `json:"media"` + Thumbnail string `json:"thumbnail"` + Title string `json:"title"` + Url string `json:"url"` + Width int `json:"width"` + Height int `json:"height"` + } `json:"items"` + } `json:"result"` + } `json:"data"` +} + +// PerformQwantImageSearch performs an image search on Qwant and returns the results. +func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, error) { + const resultsPerPage = 50 + var offset int + if page <= 1 { + offset = 0 + } else { + offset = (page - 1) * resultsPerPage + } + + if safe == "" { + safe = "0" + } + + if lang == "" { + lang = "en_CA" + } + + apiURL := fmt.Sprintf("https://api.qwant.com/v3/search/images?t=images&q=%s&count=%d&locale=%s&offset=%d&device=desktop&tgp=2&safesearch=%s", + url.QueryEscape(query), + resultsPerPage, + lang, + offset, + safe) + + client := &http.Client{Timeout: 10 * time.Second} + + req, err := http.NewRequest("GET", apiURL, nil) + if err != nil { + return nil, fmt.Errorf("creating request: %v", err) + } + + ImageUserAgent, err := GetUserAgent("Image-Search") + if err != nil { + return nil, err + } + + req.Header.Set("User-Agent", ImageUserAgent) + + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("making request: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + } + + var apiResp QwantAPIResponse + if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil { + return nil, fmt.Errorf("decoding response: %v", err) + } + + var results []ImageSearchResult + for _, item := range apiResp.Data.Result.Items { + results = append(results, ImageSearchResult{ + Thumbnail: item.Thumbnail, + Title: item.Title, + Media: item.Media, + Source: item.Url, + ThumbProxy: "/img_proxy?url=" + url.QueryEscape(item.Media), + Width: item.Width, + Height: item.Height, + }) + } + + return results, nil +} diff --git a/images.go b/images.go index 16e8581..f235a85 100644 --- a/images.go +++ b/images.go @@ -1,120 +1,35 @@ package main import ( - "encoding/json" "fmt" "html/template" "log" + "math/rand" "net/http" - "net/url" + "sync" "time" ) -// QwantAPIResponse represents the JSON response structure from Qwant API -type QwantAPIResponse struct { - Data struct { - Result struct { - Items []struct { - Media string `json:"media"` - Thumbnail string `json:"thumbnail"` - Title string `json:"title"` - Url string `json:"url"` - Width int `json:"width"` - Height int `json:"height"` - } `json:"items"` - } `json:"result"` - } `json:"data"` +var ( + imageEngines []imageEngine + imageEngineLock sync.Mutex +) + +type imageEngine struct { + Name string + Func func(string, string, string, int) ([]ImageSearchResult, error) + Weight int } -var funcs = template.FuncMap{ - "sub": func(a, b int) int { - return a - b - }, - "add": func(a, b int) int { - return a + b - }, +func init() { + imageEngines = []imageEngine{ + {Name: "Qwant", Func: PerformQwantImageSearch, Weight: 1}, + {Name: "Imgur", Func: PerformImgurImageSearch, Weight: 2}, + } + + rand.Seed(time.Now().UnixNano()) } -// FetchImageResults contacts the image search API and returns a slice of ImageSearchResult -func fetchImageResults(query string, safe, lang string, page int) ([]ImageSearchResult, error) { - const resultsPerPage = 50 - var offset int - if page <= 1 { - offset = 0 - } else { - offset = (page - 1) * resultsPerPage - } - - // Ensuring safe search is disabled by default if not specified - if safe == "" { - safe = "0" - } - - // Defaulting to English Canada locale if not specified - if lang == "" { - lang = "en_CA" - } - - // Format &lang=lang_de is incorrect, implement fix ! - apiURL := fmt.Sprintf("https://api.qwant.com/v3/search/images?t=images&q=%s&count=%d&locale=%s&offset=%d&device=desktop&tgp=2&safesearch=%s", - url.QueryEscape(query), - resultsPerPage, - lang, - offset, - safe) - - client := &http.Client{Timeout: 10 * time.Second} - - req, err := http.NewRequest("GET", apiURL, nil) - if err != nil { - return nil, fmt.Errorf("creating request: %v", err) - } - - // User Agent generation - ImageUserAgent, err := GetUserAgent("Image-Search") - if err != nil { - fmt.Println("Error:", err) - return nil, err - } - - if debugMode { - fmt.Println("Generated User Agent (images):", ImageUserAgent) - } - - req.Header.Set("User-Agent", ImageUserAgent) - - resp, err := client.Do(req) - if err != nil { - return nil, fmt.Errorf("making request: %v", err) - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) - } - - var apiResp QwantAPIResponse - if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil { - return nil, fmt.Errorf("decoding response: %v", err) - } - - var results []ImageSearchResult - for _, item := range apiResp.Data.Result.Items { - results = append(results, ImageSearchResult{ - Thumbnail: item.Thumbnail, // Thumbnail URL - Title: item.Title, // Image title - Media: item.Media, // Direct link to the image - Source: item.Url, - ThumbProxy: "/img_proxy?url=" + url.QueryEscape(item.Media), - Width: item.Width, - Height: item.Height, - }) - } - - return results, nil -} - -// HandleImageSearch is the HTTP handler for image search requests func handleImageSearch(w http.ResponseWriter, query, safe, lang string, page int) { startTime := time.Now() @@ -174,31 +89,58 @@ func getImageResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string select { case results := <-cacheChan: if results == nil { - combinedResults = fetchAndCacheImageResults(query, safe, lang, page) + combinedResults = fetchImageResults(query, safe, lang, page) + resultsCache.Set(cacheKey, convertToSearchResults(combinedResults)) } else { _, _, imageResults := convertToSpecificResults(results) combinedResults = imageResults } case <-time.After(2 * time.Second): log.Println("Cache check timeout") - combinedResults = fetchAndCacheImageResults(query, safe, lang, page) + combinedResults = fetchImageResults(query, safe, lang, page) + resultsCache.Set(cacheKey, convertToSearchResults(combinedResults)) } return combinedResults } -func fetchAndCacheImageResults(query, safe, lang string, page int) []ImageSearchResult { - results, err := fetchImageResults(query, safe, lang, page) - if err != nil || len(results) == 0 { - log.Printf("Error fetching image results: %v", err) - return []ImageSearchResult{ - {Title: "Results are currently unavailable, sorry. Please try again later."}, - } - } +func fetchImageResults(query, safe, lang string, page int) []ImageSearchResult { + engine := selectImageEngine() + log.Printf("Using image search engine: %s", engine.Name) - // Cache the valid results - cacheKey := CacheKey{Query: query, Page: page, Safe: safe == "true", Lang: lang, Type: "image"} - resultsCache.Set(cacheKey, convertToSearchResults(results)) + results, err := engine.Func(query, safe, lang, page) + if err != nil { + log.Printf("Error performing image search with %s: %v", engine.Name, err) + return nil + } return results } + +func selectImageEngine() imageEngine { + imageEngineLock.Lock() + defer imageEngineLock.Unlock() + + totalWeight := 0 + for _, engine := range imageEngines { + totalWeight += engine.Weight + } + + randValue := rand.Intn(totalWeight) + for _, engine := range imageEngines { + if randValue < engine.Weight { + // Adjust weights for load balancing + for i := range imageEngines { + if imageEngines[i].Name == engine.Name { + imageEngines[i].Weight = max(1, imageEngines[i].Weight-1) + } else { + imageEngines[i].Weight++ + } + } + return engine + } + randValue -= engine.Weight + } + + return imageEngines[0] // fallback to the first engine +} diff --git a/run.sh b/run.sh index aa4f4f3..9fa41ee 100755 --- a/run.sh +++ b/run.sh @@ -1,3 +1,3 @@ #!/bin/bash -go run main.go images.go imageproxy.go video.go map.go text.go text-searchxng.go text-librex.go text-google.go cache.go forums.go files.go files-torrentgalaxy.go files-thepiratebay.go agent.go \ No newline at end of file +go run main.go common.go images.go imageproxy.go images-quant.go images-imgur.go video.go map.go text.go text-searchxng.go text-librex.go text-google.go cache.go forums.go files.go files-torrentgalaxy.go files-thepiratebay.go agent.go \ No newline at end of file diff --git a/text.go b/text.go index ecf5d73..4cc1e0b 100644 --- a/text.go +++ b/text.go @@ -11,7 +11,6 @@ import ( ) var ( - debugMode bool searchEngines []searchEngine searchEngineLock sync.Mutex ) @@ -23,8 +22,6 @@ type searchEngine struct { } func init() { - debugMode = false - searchEngines = []searchEngine{ {Name: "Google", Func: PerformGoogleTextSearch, Weight: 1}, {Name: "LibreX", Func: PerformLibreXTextSearch, Weight: 2}, @@ -138,13 +135,6 @@ func selectSearchEngine() searchEngine { return searchEngines[0] // fallback to the first engine } -func max(a, b int) int { - if a > b { - return a - } - return b -} - func displayResults(w http.ResponseWriter, results []TextSearchResult, query, lang string, elapsed float64, page int, hasPrevPage, hasNextPage bool) { log.Printf("Displaying results for page %d", page) log.Printf("Total results: %d", len(results)) -- 2.40.1 From d91c275aed3ba4d3703018e39aeedc9df98ef993 Mon Sep 17 00:00:00 2001 From: admin Date: Mon, 10 Jun 2024 13:12:09 +0200 Subject: [PATCH 05/17] added "next" button for videos --- templates/videos.html | 42 ++++++++++++++++++++++++++---------------- video.go | 11 +++++++++-- 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/templates/videos.html b/templates/videos.html index 4289967..c3fa06e 100644 --- a/templates/videos.html +++ b/templates/videos.html @@ -10,10 +10,10 @@

Ocásek

- - - -
+ + + +
@@ -32,17 +32,16 @@
-
- - +
+ + +
-
-

@@ -51,11 +50,11 @@
-
+

{{ .Title }}

{{ .Views }} | {{ .Date }}

YouTube | {{ .Creator }}

@@ -64,12 +63,23 @@
{{ end }} {{ else }} -
No results found for '{{ .Query }}'. Try different keywords.
> +
No results found for '{{ .Query }}'. Try different keywords.
{{ end }} - +
+
+ + + {{ if .HasPrevPage }} + + {{ end }} + {{ if .HasNextPage }} + + {{ end }} +
+
- + \ No newline at end of file diff --git a/video.go b/video.go index ec78793..4aa1ec3 100644 --- a/video.go +++ b/video.go @@ -180,16 +180,23 @@ func handleVideoSearch(w http.ResponseWriter, query, safe, lang string, page int } elapsed := time.Since(start) - tmpl, err := template.ParseFiles("templates/videos.html") + tmpl, err := template.New("videos.html").Funcs(funcs).ParseFiles("templates/videos.html") if err != nil { log.Printf("Error parsing template: %v", err) http.Error(w, "Internal Server Error", http.StatusInternalServerError) return } - tmpl.Execute(w, map[string]interface{}{ + err = tmpl.Execute(w, map[string]interface{}{ "Results": results, "Query": query, "Fetched": fmt.Sprintf("%.2f seconds", elapsed.Seconds()), + "Page": page, + "HasPrevPage": page > 1, + "HasNextPage": len(results) > 0, // assuming you have a way to determine if there are more pages }) + if err != nil { + log.Printf("Error executing template: %v", err) + http.Error(w, "Internal Server Error", http.StatusInternalServerError) + } } -- 2.40.1 From 810f57dd77651afb72d223c9ac862864adcf71ca Mon Sep 17 00:00:00 2001 From: partisan Date: Wed, 12 Jun 2024 14:26:50 +0200 Subject: [PATCH 06/17] opensearch.xml generator --- .gitignore | 2 + init.go | 116 ++++++++++++++++++++++++++++++++++++++++ main.go | 34 +++++++----- open-search.go | 50 +++++++++++++++++ run.sh | 2 +- templates/files.html | 1 + templates/forums.html | 1 + templates/images.html | 1 + templates/map.html | 1 + templates/search.html | 1 + templates/settings.html | 1 + templates/text.html | 1 + templates/videos.html | 1 + 13 files changed, 198 insertions(+), 14 deletions(-) create mode 100644 .gitignore create mode 100644 init.go create mode 100644 open-search.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0034634 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +config.json +opensearch.xml \ No newline at end of file diff --git a/init.go b/init.go new file mode 100644 index 0000000..b3129a4 --- /dev/null +++ b/init.go @@ -0,0 +1,116 @@ +package main + +import ( + "bufio" + "encoding/json" + "fmt" + "log" + "os" + "strconv" +) + +// Configuration structure +type Config struct { + Port int + OpenSearch OpenSearchConfig +} + +type OpenSearchConfig struct { + Domain string +} + +// Default configuration values +var defaultConfig = Config{ + Port: 5000, + OpenSearch: OpenSearchConfig{ + Domain: "localhost", + }, +} + +const configFilePath = "config.json" + +func main() { + // Run the initialization process + err := initConfig() + if err != nil { + fmt.Println("Error during initialization:", err) + return + } + + // Start the main application + runServer() +} + +func initConfig() error { + if _, err := os.Stat(configFilePath); os.IsNotExist(err) { + return createConfig() + } + + fmt.Println("Configuration file already exists.") + return nil +} + +func createConfig() error { + reader := bufio.NewReader(os.Stdin) + + fmt.Println("Configuration file not found.") + fmt.Print("Do you want to use default values? (yes/no): ") + useDefaults, _ := reader.ReadString('\n') + + config := defaultConfig + if useDefaults != "yes\n" { + fmt.Print("Enter port (default 5000): ") + portStr, _ := reader.ReadString('\n') + if portStr != "\n" { + port, err := strconv.Atoi(portStr[:len(portStr)-1]) + if err != nil { + return err + } + config.Port = port + } + + fmt.Print("Enter your domain address (e.g., domain.com): ") + domain, _ := reader.ReadString('\n') + if domain != "\n" { + config.OpenSearch.Domain = domain[:len(domain)-1] + } + } + + saveConfig(config) + return nil +} + +func saveConfig(config Config) { + file, err := os.Create(configFilePath) + if err != nil { + fmt.Println("Error creating config file:", err) + return + } + defer file.Close() + + configData, err := json.MarshalIndent(config, "", " ") + if err != nil { + fmt.Println("Error marshalling config data:", err) + return + } + + _, err = file.Write(configData) + if err != nil { + fmt.Println("Error writing to config file:", err) + } +} + +func loadConfig() Config { + configFile, err := os.Open(configFilePath) + if err != nil { + log.Fatalf("Error opening config file: %v", err) + } + defer configFile.Close() + + var config Config + if err := json.NewDecoder(configFile).Decode(&config); err != nil { + log.Fatalf("Error decoding config file: %v", err) + } + + return config +} diff --git a/main.go b/main.go index a10f225..a463ba6 100644 --- a/main.go +++ b/main.go @@ -63,19 +63,6 @@ var languageOptions = []LanguageOption{ {Code: "lang_vi", Name: "Tiếng Việt (Vietnamese)"}, } -func main() { - http.Handle("/static/", http.StripPrefix("/static/", http.FileServer(http.Dir("static")))) - http.HandleFunc("/", handleSearch) - http.HandleFunc("/search", handleSearch) - http.HandleFunc("/img_proxy", handleImageProxy) - http.HandleFunc("/settings", func(w http.ResponseWriter, r *http.Request) { - http.ServeFile(w, r, "templates/settings.html") - }) - initializeTorrentSites() - fmt.Println("Server is listening on http://localhost:5000") - log.Fatal(http.ListenAndServe(":5000", nil)) -} - func handleSearch(w http.ResponseWriter, r *http.Request) { query, safe, lang, searchType, page := parseSearchParams(r) @@ -133,3 +120,24 @@ func parsePageParameter(pageStr string) int { } return page } + +func runServer() { + http.Handle("/static/", http.StripPrefix("/static/", http.FileServer(http.Dir("static")))) + http.HandleFunc("/", handleSearch) + http.HandleFunc("/search", handleSearch) + http.HandleFunc("/img_proxy", handleImageProxy) + http.HandleFunc("/settings", func(w http.ResponseWriter, r *http.Request) { + http.ServeFile(w, r, "templates/settings.html") + }) + http.HandleFunc("/opensearch.xml", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/opensearchdescription+xml") + http.ServeFile(w, r, "static/opensearch.xml") + }) + initializeTorrentSites() + + config := loadConfig() + generateOpenSearchXML(config) + + fmt.Printf("Server is listening on http://localhost:%d\n", config.Port) + log.Fatal(http.ListenAndServe(fmt.Sprintf(":%d", config.Port), nil)) +} diff --git a/open-search.go b/open-search.go new file mode 100644 index 0000000..b685e42 --- /dev/null +++ b/open-search.go @@ -0,0 +1,50 @@ +package main + +import ( + "encoding/xml" + "fmt" + "os" +) + +type OpenSearchDescription struct { + XMLName xml.Name `xml:"OpenSearchDescription"` + Xmlns string `xml:"xmlns,attr"` + ShortName string `xml:"ShortName"` + Description string `xml:"Description"` + Tags string `xml:"Tags"` + URL URL `xml:"Url"` +} + +type URL struct { + Type string `xml:"type,attr"` + Template string `xml:"template,attr"` +} + +func generateOpenSearchXML(config Config) { + opensearch := OpenSearchDescription{ + Xmlns: "http://a9.com/-/spec/opensearch/1.1/", + ShortName: "Ocásek", + Description: "Search engine", + Tags: "search, engine", + URL: URL{ + Type: "text/html", + Template: fmt.Sprintf("https://%s/search?q={searchTerms}", config.OpenSearch.Domain), + }, + } + + file, err := os.Create("static/opensearch.xml") + if err != nil { + fmt.Println("Error creating OpenSearch file:", err) + return + } + defer file.Close() + + enc := xml.NewEncoder(file) + enc.Indent(" ", " ") + if err := enc.Encode(opensearch); err != nil { + fmt.Println("Error encoding OpenSearch XML:", err) + return + } + + fmt.Println("OpenSearch description file generated successfully.") +} diff --git a/run.sh b/run.sh index 9fa41ee..a845d9f 100755 --- a/run.sh +++ b/run.sh @@ -1,3 +1,3 @@ #!/bin/bash -go run main.go common.go images.go imageproxy.go images-quant.go images-imgur.go video.go map.go text.go text-searchxng.go text-librex.go text-google.go cache.go forums.go files.go files-torrentgalaxy.go files-thepiratebay.go agent.go \ No newline at end of file +go run main.go common.go init.go open-search.go images.go imageproxy.go images-quant.go images-imgur.go video.go map.go text.go text-searchxng.go text-librex.go text-google.go cache.go forums.go files.go files-torrentgalaxy.go files-thepiratebay.go agent.go \ No newline at end of file diff --git a/templates/files.html b/templates/files.html index d3e8701..67ed60d 100644 --- a/templates/files.html +++ b/templates/files.html @@ -5,6 +5,7 @@ {{.Query}} - Ocásek +
diff --git a/templates/forums.html b/templates/forums.html index 4501a9b..c7752aa 100644 --- a/templates/forums.html +++ b/templates/forums.html @@ -5,6 +5,7 @@ {{.Query}} - Ocásek + diff --git a/templates/images.html b/templates/images.html index a49c217..3ae4a86 100644 --- a/templates/images.html +++ b/templates/images.html @@ -5,6 +5,7 @@ {{.Query}} - Ocásek + diff --git a/templates/map.html b/templates/map.html index 747d568..f698229 100644 --- a/templates/map.html +++ b/templates/map.html @@ -5,6 +5,7 @@ {{ .Query }} - Ocásek +