Search/text-google.go
2024-06-09 12:43:46 +02:00

116 lines
2.7 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package main
import (
"context"
"fmt"
"net/url"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/chromedp/chromedp"
)
// type TextSearchResult struct {
// URL string
// Header string
// Description string
// }
// func main() {
// // Example usage
// results, err := PerformGoogleTextSearch("golang", "off", "lang_en", 2)
// if err != nil {
// log.Fatalf("Error performing search: %v", err)
// }
// for _, result := range results {
// fmt.Printf("URL: %s\nHeader: %s\nDescription: %s\n", result.URL, result.Header, result.Description)
// }
// }
func PerformGoogleTextSearch(query, safe, lang string, numPages int) ([]TextSearchResult, error) {
ctx, cancel := chromedp.NewContext(context.Background())
defer cancel()
var results []TextSearchResult
searchURL := buildSearchURL(query, safe, lang, 1, 10)
err := chromedp.Run(ctx,
chromedp.Navigate(searchURL),
)
if err != nil {
return nil, fmt.Errorf("failed to navigate to search URL: %v", err)
}
for page := 1; page <= numPages; page++ {
var pageSource string
err := chromedp.Run(ctx,
chromedp.Sleep(2*time.Second),
chromedp.OuterHTML("html", &pageSource),
chromedp.Evaluate(`window.scrollTo(0, document.body.scrollHeight);`, nil),
)
if err != nil {
return nil, fmt.Errorf("failed to retrieve page source: %v", err)
}
newResults, err := parseResults(pageSource)
if err != nil {
return nil, fmt.Errorf("error parsing results: %v", err)
}
results = append(results, newResults...)
}
return results, nil
}
func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string {
safeParam := "&safe=off"
if safe == "active" {
safeParam = "&safe=active"
}
langParam := ""
if lang != "" {
langParam = "&lr=" + lang
}
return fmt.Sprintf("https://www.google.com/search?q=%s%s%s", url.QueryEscape(query), safeParam, langParam)
}
func parseResults(pageSource string) ([]TextSearchResult, error) {
var results []TextSearchResult
doc, err := goquery.NewDocumentFromReader(strings.NewReader(pageSource))
if err != nil {
return nil, fmt.Errorf("loading HTML document: %v", err)
}
doc.Find(".yuRUbf").Each(func(i int, s *goquery.Selection) {
link := s.Find("a")
href, exists := link.Attr("href")
if !exists {
return
}
header := link.Find("h3").Text()
header = strings.TrimSpace(strings.TrimSuffix(header, ""))
description := ""
descSelection := doc.Find(".VwiC3b").Eq(i)
if descSelection.Length() > 0 {
description = descSelection.Text()
}
result := TextSearchResult{
URL: href,
Header: header,
Description: description,
}
results = append(results, result)
})
return results, nil
}