Search/text-google.go
2024-06-09 21:44:49 +02:00

98 lines
2.3 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package main
import (
"context"
"fmt"
"net/url"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/chromedp/chromedp"
)
func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.DisableGPU,
chromedp.NoDefaultBrowserCheck,
chromedp.NoFirstRun,
chromedp.Flag("disable-javascript", true),
)
ctx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
defer cancel()
ctx, cancel = chromedp.NewContext(ctx)
defer cancel()
var results []TextSearchResult
searchURL := buildSearchURL(query, safe, lang, page, 10)
var pageSource string
err := chromedp.Run(ctx,
chromedp.Navigate(searchURL),
chromedp.Sleep(2*time.Second),
chromedp.OuterHTML("html", &pageSource),
)
if err != nil {
return nil, fmt.Errorf("failed to retrieve page source: %v", err)
}
newResults, err := parseResults(pageSource)
if err != nil {
return nil, fmt.Errorf("error parsing results: %v", err)
}
results = append(results, newResults...)
return results, nil
}
func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string {
safeParam := "&safe=off"
if safe == "active" {
safeParam = "&safe=active"
}
langParam := ""
if lang != "" {
langParam = "&lr=" + lang
}
startParam := fmt.Sprintf("&start=%d", (page-1)*resultsPerPage)
return fmt.Sprintf("https://www.google.com/search?q=%s%s%s%s", url.QueryEscape(query), safeParam, langParam, startParam)
}
func parseResults(pageSource string) ([]TextSearchResult, error) {
var results []TextSearchResult
doc, err := goquery.NewDocumentFromReader(strings.NewReader(pageSource))
if err != nil {
return nil, fmt.Errorf("loading HTML document: %v", err)
}
doc.Find(".yuRUbf").Each(func(i int, s *goquery.Selection) {
link := s.Find("a")
href, exists := link.Attr("href")
if !exists {
return
}
header := link.Find("h3").Text()
header = strings.TrimSpace(strings.TrimSuffix(header, ""))
description := ""
descSelection := doc.Find(".VwiC3b").Eq(i)
if descSelection.Length() > 0 {
description = descSelection.Text()
}
result := TextSearchResult{
URL: href,
Header: header,
Description: description,
}
results = append(results, result)
})
return results, nil
}