diff --git a/text-google.go b/text-google.go index 971c407..9c338cc 100644 --- a/text-google.go +++ b/text-google.go @@ -1,47 +1,62 @@ package main import ( - "context" "fmt" + "log" + "net/http" "net/url" "strings" - "time" "github.com/PuerkitoBio/goquery" - "github.com/chromedp/chromedp" ) func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) { - opts := append(chromedp.DefaultExecAllocatorOptions[:], - chromedp.DisableGPU, - chromedp.NoDefaultBrowserCheck, - chromedp.NoFirstRun, - chromedp.Flag("disable-javascript", true), - ) - ctx, cancel := chromedp.NewExecAllocator(context.Background(), opts...) - defer cancel() - - ctx, cancel = chromedp.NewContext(ctx) - defer cancel() - + const resultsPerPage = 10 var results []TextSearchResult - searchURL := buildSearchURL(query, safe, lang, page, 10) - var pageSource string - err := chromedp.Run(ctx, - chromedp.Navigate(searchURL), - chromedp.Sleep(2*time.Second), - chromedp.OuterHTML("html", &pageSource), - ) + client := &http.Client{} + searchURL := buildSearchURL(query, safe, lang, page, resultsPerPage) + + req, err := http.NewRequest("GET", searchURL, nil) if err != nil { - return nil, fmt.Errorf("failed to retrieve page source: %v", err) + return nil, fmt.Errorf("failed to create request: %v", err) } - newResults, err := parseResults(pageSource) + // User Agent generation + TextUserAgent, err := GetUserAgent("Text-Search") if err != nil { - return nil, fmt.Errorf("error parsing results: %v", err) + fmt.Println("Error:", err) + return nil, err + } + + if debugMode { + fmt.Println("Generated User Agent (text):", TextUserAgent) + } + + req.Header.Set("User-Agent", TextUserAgent) + + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("making request: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + } + + doc, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + return nil, fmt.Errorf("loading HTML document: %v", err) + } + + results = parseResults(doc) + + if len(results) == 0 { + if debugMode { + log.Println("No results found from Google") + } } - results = append(results, newResults...) return results, nil } @@ -57,23 +72,20 @@ func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string { langParam = "&lr=" + lang } - startParam := fmt.Sprintf("&start=%d", (page-1)*resultsPerPage) - - return fmt.Sprintf("https://www.google.com/search?q=%s%s%s%s", url.QueryEscape(query), safeParam, langParam, startParam) + startIndex := (page - 1) * resultsPerPage + return fmt.Sprintf("https://www.google.com/search?q=%s%s%s&udm=14&start=%d", url.QueryEscape(query), safeParam, langParam, startIndex) } -func parseResults(pageSource string) ([]TextSearchResult, error) { +func parseResults(doc *goquery.Document) []TextSearchResult { var results []TextSearchResult - doc, err := goquery.NewDocumentFromReader(strings.NewReader(pageSource)) - if err != nil { - return nil, fmt.Errorf("loading HTML document: %v", err) - } - doc.Find(".yuRUbf").Each(func(i int, s *goquery.Selection) { link := s.Find("a") href, exists := link.Attr("href") if !exists { + if debugMode { + log.Printf("No href attribute found for result %d\n", i) + } return } @@ -92,7 +104,10 @@ func parseResults(pageSource string) ([]TextSearchResult, error) { Description: description, } results = append(results, result) + if debugMode { + log.Printf("Google result: %+v\n", result) + } }) - return results, nil + return results } diff --git a/text-librex.go b/text-librex.go index 526d7e8..15dddd4 100644 --- a/text-librex.go +++ b/text-librex.go @@ -19,7 +19,11 @@ type LibreXResult struct { type LibreXResponse []LibreXResult func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) { - // LibreX uses page starting from 0 + + // LibreX/Y uses offset instead of page that starts at 0 + page-- + page = page * 10 + searchURL := fmt.Sprintf("https://%s/api.php?q=%s&p=%d&t=0", LIBREX_DOMAIN, url.QueryEscape(query), page) // User Agent generation