Search/text-duckduckgo.go

128 lines
3.1 KiB
Go
Raw Normal View History

2024-05-17 23:59:29 +00:00
package main
import (
"fmt"
"log"
"net/http"
"net/url"
"strings"
"time"
2024-05-17 23:59:29 +00:00
"github.com/PuerkitoBio/goquery"
)
2024-05-21 10:15:31 +00:00
const (
resultsPerPage = 10
)
func getVQD(query string) (string, error) {
queryURL := fmt.Sprintf("https://duckduckgo.com/?q=%s", url.QueryEscape(query))
resp, err := http.Get(queryURL)
if err != nil {
return "", fmt.Errorf("failed to fetch vqd: %v", err)
}
defer resp.Body.Close()
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return "", fmt.Errorf("loading HTML document: %v", err)
}
var vqd string
doc.Find("script").Each(func(i int, s *goquery.Selection) {
text := s.Text()
if strings.Contains(text, "vqd=\"") {
start := strings.Index(text, "vqd=\"") + 5
end := strings.Index(text[start:], "\"")
vqd = text[start : start+end]
}
})
if vqd == "" {
return "", fmt.Errorf("vqd not found")
}
return vqd, nil
}
func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
2024-05-17 23:59:29 +00:00
var results []TextSearchResult
client := &http.Client{Timeout: 10 * time.Second}
2024-05-21 10:15:31 +00:00
vqd, err := getVQD(query)
if err != nil {
return nil, fmt.Errorf("failed to get vqd: %v", err)
}
searchURL := fmt.Sprintf("https://duckduckgo.com/html/?q=%s&kl=%s&safe=%s&s=%d&vqd=%s",
url.QueryEscape(query), lang, safe, (page-1)*resultsPerPage, vqd)
req, err := http.NewRequest("GET", searchURL, nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %v", err)
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36")
resp, err := client.Do(req)
2024-05-17 23:59:29 +00:00
if err != nil {
return nil, fmt.Errorf("making request: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, fmt.Errorf("loading HTML document: %v", err)
}
doc.Find(".result__body").Each(func(i int, s *goquery.Selection) {
header := s.Find(".result__a").Text()
description := s.Find(".result__snippet").Text()
rawURL, exists := s.Find(".result__a").Attr("href")
if exists {
parsedURL, err := url.Parse(rawURL)
if err == nil {
queryParams := parsedURL.Query()
uddg := queryParams.Get("uddg")
if uddg != "" {
result := TextSearchResult{
URL: uddg,
Header: strings.TrimSpace(header),
Description: strings.TrimSpace(description),
2024-05-21 10:15:31 +00:00
Source: "DuckDuckGo",
2024-05-17 23:59:29 +00:00
}
results = append(results, result)
if debugMode {
log.Printf("Processed DuckDuckGo result: %+v\n", result)
}
} else {
if debugMode {
log.Printf("Missing 'uddg' parameter in URL: %s\n", rawURL)
}
}
} else {
if debugMode {
log.Printf("Error parsing URL: %s, error: %v\n", rawURL, err)
2024-05-17 23:59:29 +00:00
}
}
} else {
if debugMode {
log.Printf("Missing 'href' attribute in result anchor tag\n")
}
2024-05-17 23:59:29 +00:00
}
})
if len(results) == 0 {
if debugMode {
log.Println("No results found from DuckDuckGo")
}
}
2024-05-17 23:59:29 +00:00
return results, nil
}