Compare commits

..

No commits in common. "70d6db50d3d1d92e19361173a4c56e09a8d7e6ce" and "d0e0d31b67e256b97aab744707bc9eb43a31c671" have entirely different histories.

2 changed files with 37 additions and 56 deletions

View file

@ -1,62 +1,47 @@
package main package main
import ( import (
"context"
"fmt" "fmt"
"log"
"net/http"
"net/url" "net/url"
"strings" "strings"
"time"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
"github.com/chromedp/chromedp"
) )
func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) { func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
const resultsPerPage = 10 opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.DisableGPU,
chromedp.NoDefaultBrowserCheck,
chromedp.NoFirstRun,
chromedp.Flag("disable-javascript", true),
)
ctx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
defer cancel()
ctx, cancel = chromedp.NewContext(ctx)
defer cancel()
var results []TextSearchResult var results []TextSearchResult
client := &http.Client{} searchURL := buildSearchURL(query, safe, lang, page, 10)
searchURL := buildSearchURL(query, safe, lang, page, resultsPerPage) var pageSource string
err := chromedp.Run(ctx,
req, err := http.NewRequest("GET", searchURL, nil) chromedp.Navigate(searchURL),
chromedp.Sleep(2*time.Second),
chromedp.OuterHTML("html", &pageSource),
)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create request: %v", err) return nil, fmt.Errorf("failed to retrieve page source: %v", err)
} }
// User Agent generation newResults, err := parseResults(pageSource)
TextUserAgent, err := GetUserAgent("Text-Search")
if err != nil { if err != nil {
fmt.Println("Error:", err) return nil, fmt.Errorf("error parsing results: %v", err)
return nil, err
}
if debugMode {
fmt.Println("Generated User Agent (text):", TextUserAgent)
}
req.Header.Set("User-Agent", TextUserAgent)
resp, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("making request: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, fmt.Errorf("loading HTML document: %v", err)
}
results = parseResults(doc)
if len(results) == 0 {
if debugMode {
log.Println("No results found from Google")
}
} }
results = append(results, newResults...)
return results, nil return results, nil
} }
@ -72,20 +57,23 @@ func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string {
langParam = "&lr=" + lang langParam = "&lr=" + lang
} }
startIndex := (page - 1) * resultsPerPage startParam := fmt.Sprintf("&start=%d", (page-1)*resultsPerPage)
return fmt.Sprintf("https://www.google.com/search?q=%s%s%s&udm=14&start=%d", url.QueryEscape(query), safeParam, langParam, startIndex)
return fmt.Sprintf("https://www.google.com/search?q=%s%s%s%s", url.QueryEscape(query), safeParam, langParam, startParam)
} }
func parseResults(doc *goquery.Document) []TextSearchResult { func parseResults(pageSource string) ([]TextSearchResult, error) {
var results []TextSearchResult var results []TextSearchResult
doc, err := goquery.NewDocumentFromReader(strings.NewReader(pageSource))
if err != nil {
return nil, fmt.Errorf("loading HTML document: %v", err)
}
doc.Find(".yuRUbf").Each(func(i int, s *goquery.Selection) { doc.Find(".yuRUbf").Each(func(i int, s *goquery.Selection) {
link := s.Find("a") link := s.Find("a")
href, exists := link.Attr("href") href, exists := link.Attr("href")
if !exists { if !exists {
if debugMode {
log.Printf("No href attribute found for result %d\n", i)
}
return return
} }
@ -104,10 +92,7 @@ func parseResults(doc *goquery.Document) []TextSearchResult {
Description: description, Description: description,
} }
results = append(results, result) results = append(results, result)
if debugMode {
log.Printf("Google result: %+v\n", result)
}
}) })
return results return results, nil
} }

View file

@ -19,11 +19,7 @@ type LibreXResult struct {
type LibreXResponse []LibreXResult type LibreXResponse []LibreXResult
func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) { func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
// LibreX uses page starting from 0
// LibreX/Y uses offset instead of page that starts at 0
page--
page = page * 10
searchURL := fmt.Sprintf("https://%s/api.php?q=%s&p=%d&t=0", LIBREX_DOMAIN, url.QueryEscape(query), page) searchURL := fmt.Sprintf("https://%s/api.php?q=%s&p=%d&t=0", LIBREX_DOMAIN, url.QueryEscape(query), page)
// User Agent generation // User Agent generation