Search/text-google.go
2024-08-12 17:02:17 +02:00

120 lines
2.8 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package main
import (
"fmt"
"math/rand"
"net/http"
"net/url"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
const resultsPerPage = 10
var results []TextSearchResult
startTime := time.Now() // Start the timer
client := &http.Client{}
searchURL := buildSearchURL(query, safe, lang, page, resultsPerPage)
req, err := http.NewRequest("GET", searchURL, nil)
if err != nil {
return nil, 0, fmt.Errorf("failed to create request: %v", err)
}
// User Agent generation
TextUserAgent, err := GetUserAgent("Text-Search")
if err != nil {
return nil, 0, err
}
req.Header.Set("User-Agent", TextUserAgent)
resp, err := client.Do(req)
if err != nil {
return nil, 0, fmt.Errorf("making request: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, 0, fmt.Errorf("loading HTML document: %v", err)
}
results = parseResults(doc)
duration := time.Since(startTime) // Calculate the duration
if len(results) == 0 {
printDebug("No results found from Google Search")
}
return results, duration, nil
}
func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string {
safeParam := "&safe=off"
if safe == "active" {
safeParam = "&safe=active"
}
langParam := ""
if lang != "" {
langParam = "&lr=" + lang
}
// Generate random geolocation
glParam, uuleParam := getRandomGeoLocation()
startIndex := (page - 1) * resultsPerPage
return fmt.Sprintf("https://www.google.com/search?q=%s%s%s%s%s&start=%d",
url.QueryEscape(query), safeParam, langParam, glParam, uuleParam, startIndex)
}
func getRandomGeoLocation() (string, string) {
countries := []string{"us", "ca", "gb", "fr", "de", "au", "in", "jp", "br", "za"}
randomCountry := countries[rand.Intn(len(countries))]
glParam := "&gl=" + randomCountry
uuleParam := ""
return glParam, uuleParam
}
func parseResults(doc *goquery.Document) []TextSearchResult {
var results []TextSearchResult
doc.Find(".yuRUbf").Each(func(i int, s *goquery.Selection) {
link := s.Find("a")
href, exists := link.Attr("href")
if !exists {
printDebug("No href attribute found for result %d\n", i)
return
}
header := link.Find("h3").Text()
header = strings.TrimSpace(strings.TrimSuffix(header, ""))
description := ""
descSelection := doc.Find(".VwiC3b").Eq(i)
if descSelection.Length() > 0 {
description = descSelection.Text()
}
result := TextSearchResult{
URL: href,
Header: header,
Description: description,
}
results = append(results, result)
})
return results
}