Search/text-google.go

99 lines
2.3 KiB
Go
Raw Normal View History

2024-04-15 06:35:17 +00:00
package main
import (
2024-06-09 10:43:46 +00:00
"context"
"fmt"
2024-04-15 06:35:17 +00:00
"net/url"
"strings"
2024-06-09 10:43:46 +00:00
"time"
2024-04-15 06:35:17 +00:00
"github.com/PuerkitoBio/goquery"
2024-06-09 10:43:46 +00:00
"github.com/chromedp/chromedp"
2024-04-15 06:35:17 +00:00
)
2024-06-09 19:44:49 +00:00
func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.DisableGPU,
chromedp.NoDefaultBrowserCheck,
chromedp.NoFirstRun,
chromedp.Flag("disable-javascript", true),
)
ctx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
2024-06-09 10:43:46 +00:00
defer cancel()
2024-06-02 10:05:25 +00:00
2024-06-09 19:44:49 +00:00
ctx, cancel = chromedp.NewContext(ctx)
defer cancel()
2024-06-02 10:05:25 +00:00
2024-06-09 19:44:49 +00:00
var results []TextSearchResult
2024-04-15 06:35:17 +00:00
2024-06-09 19:44:49 +00:00
searchURL := buildSearchURL(query, safe, lang, page, 10)
var pageSource string
2024-06-09 10:43:46 +00:00
err := chromedp.Run(ctx,
chromedp.Navigate(searchURL),
2024-06-09 19:44:49 +00:00
chromedp.Sleep(2*time.Second),
chromedp.OuterHTML("html", &pageSource),
2024-06-09 10:43:46 +00:00
)
2024-04-15 06:35:17 +00:00
if err != nil {
2024-06-09 19:44:49 +00:00
return nil, fmt.Errorf("failed to retrieve page source: %v", err)
2024-04-15 06:35:17 +00:00
}
2024-06-09 19:44:49 +00:00
newResults, err := parseResults(pageSource)
if err != nil {
return nil, fmt.Errorf("error parsing results: %v", err)
2024-05-21 19:22:36 +00:00
}
2024-06-09 19:44:49 +00:00
results = append(results, newResults...)
2024-05-21 19:22:36 +00:00
return results, nil
}
func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string {
safeParam := "&safe=off"
if safe == "active" {
safeParam = "&safe=active"
}
langParam := ""
if lang != "" {
langParam = "&lr=" + lang
}
2024-06-09 19:44:49 +00:00
startParam := fmt.Sprintf("&start=%d", (page-1)*resultsPerPage)
return fmt.Sprintf("https://www.google.com/search?q=%s%s%s%s", url.QueryEscape(query), safeParam, langParam, startParam)
2024-05-21 19:22:36 +00:00
}
2024-06-09 10:43:46 +00:00
func parseResults(pageSource string) ([]TextSearchResult, error) {
2024-05-21 19:22:36 +00:00
var results []TextSearchResult
2024-06-09 10:43:46 +00:00
doc, err := goquery.NewDocumentFromReader(strings.NewReader(pageSource))
if err != nil {
return nil, fmt.Errorf("loading HTML document: %v", err)
}
2024-04-15 06:35:17 +00:00
doc.Find(".yuRUbf").Each(func(i int, s *goquery.Selection) {
link := s.Find("a")
href, exists := link.Attr("href")
if !exists {
return
}
2024-04-15 06:35:17 +00:00
header := link.Find("h3").Text()
header = strings.TrimSpace(strings.TrimSuffix(header, ""))
description := ""
2024-05-21 19:22:36 +00:00
descSelection := doc.Find(".VwiC3b").Eq(i)
2024-04-15 06:35:17 +00:00
if descSelection.Length() > 0 {
description = descSelection.Text()
}
2024-05-17 23:59:29 +00:00
result := TextSearchResult{
2024-04-15 06:35:17 +00:00
URL: href,
Header: header,
Description: description,
2024-05-17 23:59:29 +00:00
}
results = append(results, result)
2024-04-15 06:35:17 +00:00
})
2024-06-09 10:43:46 +00:00
return results, nil
2024-04-15 06:35:17 +00:00
}