Compare commits

...

2 commits

Author SHA1 Message Date
partisan
326ae967bd added brave search 2024-06-15 18:12:01 +02:00
partisan
773992aeea automatic reputation for search engines 2024-06-14 17:56:20 +02:00
10 changed files with 277 additions and 127 deletions

View file

@ -6,28 +6,31 @@ import (
"net/url"
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
// PerformImgurImageSearch performs an image search on Imgur and returns the results
func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, error) {
func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, time.Duration, error) {
startTime := time.Now() // Start the timer
var results []ImageSearchResult
searchURL := buildImgurSearchURL(query, page)
resp, err := http.Get(searchURL)
if err != nil {
return nil, fmt.Errorf("making request: %v", err)
return nil, 0, fmt.Errorf("making request: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, fmt.Errorf("loading HTML document: %v", err)
return nil, 0, fmt.Errorf("loading HTML document: %v", err)
}
doc.Find("div.cards div.post").Each(func(i int, s *goquery.Selection) {
@ -72,7 +75,9 @@ func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchR
})
})
return results, nil
duration := time.Since(startTime) // Calculate the duration
return results, duration, nil
}
// scrapeImageFromImgurPage scrapes the image source from the Imgur page
@ -130,12 +135,13 @@ func buildImgurSearchURL(query string, page int) string {
}
// func main() {
// results, err := PerformImgurImageSearch("cats", "true", "en", 1)
// results, duration, err := PerformImgurImageSearch("cats", "true", "en", 1)
// if err != nil {
// fmt.Println("Error:", err)
// return
// }
// fmt.Printf("Search took: %v\n", duration)
// for _, result := range results {
// fmt.Printf("Title: %s\nSource: %s\nMedia: %s\nThumbnail: %s\nThumbProxy: %s\nWidth: %d\nHeight: %d\n\n",
// result.Title, result.Source, result.Media, result.Thumbnail, result.ThumbProxy, result.Width, result.Height)

View file

@ -25,7 +25,9 @@ type QwantAPIResponse struct {
}
// PerformQwantImageSearch performs an image search on Qwant and returns the results.
func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, error) {
func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, time.Duration, error) {
startTime := time.Now() // Start the timer
const resultsPerPage = 50
var offset int
if page <= 1 {
@ -53,29 +55,29 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR
req, err := http.NewRequest("GET", apiURL, nil)
if err != nil {
return nil, fmt.Errorf("creating request: %v", err)
return nil, 0, fmt.Errorf("creating request: %v", err)
}
ImageUserAgent, err := GetUserAgent("Image-Search")
if err != nil {
return nil, err
return nil, 0, err
}
req.Header.Set("User-Agent", ImageUserAgent)
resp, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("making request: %v", err)
return nil, 0, fmt.Errorf("making request: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
var apiResp QwantAPIResponse
if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil {
return nil, fmt.Errorf("decoding response: %v", err)
return nil, 0, fmt.Errorf("decoding response: %v", err)
}
var results []ImageSearchResult
@ -91,5 +93,7 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR
})
}
return results, nil
duration := time.Since(startTime) // Calculate the duration
return results, duration, nil
}

View file

@ -4,30 +4,17 @@ import (
"fmt"
"html/template"
"log"
"math/rand"
"net/http"
"sync"
"time"
)
var (
imageEngines []imageEngine
imageEngineLock sync.Mutex
)
type imageEngine struct {
Name string
Func func(string, string, string, int) ([]ImageSearchResult, error)
Weight int
}
var imageSearchEngines []SearchEngine
func init() {
imageEngines = []imageEngine{
{Name: "Qwant", Func: PerformQwantImageSearch, Weight: 1},
{Name: "Imgur", Func: PerformImgurImageSearch, Weight: 2},
imageSearchEngines = []SearchEngine{
{Name: "Qwant", Func: wrapImageSearchFunc(PerformQwantImageSearch), Weight: 1},
{Name: "Imgur", Func: wrapImageSearchFunc(PerformImgurImageSearch), Weight: 2},
}
rand.Seed(time.Now().UnixNano())
}
func handleImageSearch(w http.ResponseWriter, query, safe, lang string, page int) {
@ -111,17 +98,24 @@ func getImageResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string
func fetchImageResults(query, safe, lang string, page int) []ImageSearchResult {
var results []ImageSearchResult
var err error
var duration time.Duration
for attempts := 0; attempts < len(imageEngines); attempts++ {
engine := selectImageEngine()
for attempts := 0; attempts < len(imageSearchEngines); attempts++ {
engine := selectSearchEngine(imageSearchEngines)
log.Printf("Using image search engine: %s", engine.Name)
results, err = engine.Func(query, safe, lang, page)
var searchResults []SearchResult
searchResults, duration, err = engine.Func(query, safe, lang, page)
updateEngineMetrics(&engine, duration, err == nil)
if err != nil {
log.Printf("Error performing image search with %s: %v", engine.Name, err)
continue
}
for _, result := range searchResults {
results = append(results, result.(ImageSearchResult))
}
if len(results) > 0 {
break
}
@ -130,30 +124,16 @@ func fetchImageResults(query, safe, lang string, page int) []ImageSearchResult {
return results
}
func selectImageEngine() imageEngine {
imageEngineLock.Lock()
defer imageEngineLock.Unlock()
totalWeight := 0
for _, engine := range imageEngines {
totalWeight += engine.Weight
func wrapImageSearchFunc(f func(string, string, string, int) ([]ImageSearchResult, time.Duration, error)) func(string, string, string, int) ([]SearchResult, time.Duration, error) {
return func(query, safe, lang string, page int) ([]SearchResult, time.Duration, error) {
imageResults, duration, err := f(query, safe, lang, page)
if err != nil {
return nil, duration, err
}
randValue := rand.Intn(totalWeight)
for _, engine := range imageEngines {
if randValue < engine.Weight {
// Adjust weights for load balancing
for i := range imageEngines {
if imageEngines[i].Name == engine.Name {
imageEngines[i].Weight = max(1, imageEngines[i].Weight-1)
} else {
imageEngines[i].Weight++
searchResults := make([]SearchResult, len(imageResults))
for i, result := range imageResults {
searchResults[i] = result
}
return searchResults, duration, nil
}
return engine
}
randValue -= engine.Weight
}
return imageEngines[0] // fallback to the first engine
}

8
run.sh
View file

@ -1,3 +1,7 @@
#!/bin/bash
#!/bin/sh
go run main.go common.go init.go open-search.go images.go imageproxy.go images-quant.go images-imgur.go video.go map.go text.go text-searchxng.go text-librex.go text-google.go cache.go forums.go files.go files-torrentgalaxy.go files-thepiratebay.go agent.go
# Find all .go files in the current directory
GO_FILES=$(find . -name '*.go' -print)
# Run the Go program
go run $GO_FILES

90
search-engine.go Normal file
View file

@ -0,0 +1,90 @@
package main
import (
"math/rand"
"sync"
"time"
)
var (
searchEngineLock sync.Mutex
)
// SearchEngine struct now includes metrics for calculating reputation.
type SearchEngine struct {
Name string
Func func(string, string, string, int) ([]SearchResult, time.Duration, error)
Weight int
TotalRequests int
TotalTime time.Duration
SuccessfulSearches int
FailedSearches int
}
// init function seeds the random number generator.
func init() {
rand.Seed(time.Now().UnixNano())
}
// Selects a search engine based on weighted random selection with dynamic weighting.
func selectSearchEngine(engines []SearchEngine) SearchEngine {
searchEngineLock.Lock()
defer searchEngineLock.Unlock()
// Recalculate weights based on average response time and success rate.
for i := range engines {
engines[i].Weight = calculateReputation(engines[i])
}
totalWeight := 0
for _, engine := range engines {
totalWeight += engine.Weight
}
randValue := rand.Intn(totalWeight)
for _, engine := range engines {
if randValue < engine.Weight {
return engine
}
randValue -= engine.Weight
}
return engines[0] // fallback to the first engine
}
// Updates the engine's performance metrics.
func updateEngineMetrics(engine *SearchEngine, responseTime time.Duration, success bool) {
searchEngineLock.Lock()
defer searchEngineLock.Unlock()
engine.TotalRequests++
engine.TotalTime += responseTime
if success {
engine.SuccessfulSearches++
} else {
engine.FailedSearches++
}
engine.Weight = calculateReputation(*engine)
}
// Calculates the reputation of the search engine based on average response time and success rate.
func calculateReputation(engine SearchEngine) int {
const referenceTime = time.Second // 1 second reference time in nanoseconds (1000 ms)
if engine.TotalRequests == 0 {
return 10 // Default weight for new engines
}
// Calculate average response time in seconds.
avgResponseTime := engine.TotalTime.Seconds() / float64(engine.TotalRequests)
// Calculate success rate.
successRate := float64(engine.SuccessfulSearches) / float64(engine.TotalRequests)
// Combine response time and success rate into a single reputation score.
// The formula can be adjusted to weigh response time and success rate differently.
reputation := (referenceTime.Seconds() / avgResponseTime) * successRate
// Scale reputation for better interpretability (e.g., multiply by 10)
return int(reputation * 10)
}

73
text-brave.go Normal file
View file

@ -0,0 +1,73 @@
package main
import (
"fmt"
"io/ioutil"
"net/http"
"net/url"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
// PerformBraveTextSearch performs a text search on Brave and returns the results.
func PerformBraveTextSearch(query, safe, lang string, offset int) ([]TextSearchResult, time.Duration, error) {
startTime := time.Now() // Start the timer
var results []TextSearchResult
// Build the search URL
searchURL := fmt.Sprintf("https://search.brave.com/search?q=%s&offset=%d", url.QueryEscape(query), offset)
req, err := http.NewRequest("GET", searchURL, nil)
if err != nil {
return nil, 0, fmt.Errorf("creating request: %v", err)
}
// Set headers including User-Agent
TextUserAgent, err := GetUserAgent("Text-Search")
if err != nil {
return nil, 0, err
}
req.Header.Set("User-Agent", TextUserAgent)
// Perform the HTTP request
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return nil, 0, fmt.Errorf("performing request: %v", err)
}
defer resp.Body.Close()
// Read the response body
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, 0, fmt.Errorf("reading response body: %v", err)
}
// Parse the response body
doc, err := goquery.NewDocumentFromReader(strings.NewReader(string(body)))
if err != nil {
return nil, 0, fmt.Errorf("parsing response body: %v", err)
}
// Extract search results
doc.Find(".snippet").Each(func(i int, s *goquery.Selection) {
title := s.Find(".title").Text()
description := s.Find(".snippet-description").Text()
url, exists := s.Find("a").Attr("href")
// Add to results only if all components are present
if title != "" && description != "" && exists && url != "" {
results = append(results, TextSearchResult{
Header: title,
URL: url,
Description: description,
})
}
})
duration := time.Since(startTime) // Calculate the duration
return results, duration, nil
}

View file

@ -1,4 +1,3 @@
// text-duckduckgo.go
package main
import (
@ -7,27 +6,30 @@ import (
"net/http"
"net/url"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
startTime := time.Now() // Start the timer
var results []TextSearchResult
searchURL := buildDuckDuckGoSearchURL(query, page)
resp, err := http.Get(searchURL)
if err != nil {
return nil, fmt.Errorf("making request: %v", err)
return nil, 0, fmt.Errorf("making request: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, fmt.Errorf("loading HTML document: %v", err)
return nil, 0, fmt.Errorf("loading HTML document: %v", err)
}
doc.Find(".result__body").Each(func(i int, s *goquery.Selection) {
@ -54,7 +56,9 @@ func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSear
}
})
return results, nil
duration := time.Since(startTime) // Calculate the duration
return results, duration, nil
}
func buildDuckDuckGoSearchURL(query string, page int) string {

View file

@ -6,27 +6,29 @@ import (
"net/http"
"net/url"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
const resultsPerPage = 10
var results []TextSearchResult
startTime := time.Now() // Start the timer
client := &http.Client{}
searchURL := buildSearchURL(query, safe, lang, page, resultsPerPage)
req, err := http.NewRequest("GET", searchURL, nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %v", err)
return nil, 0, fmt.Errorf("failed to create request: %v", err)
}
// User Agent generation
TextUserAgent, err := GetUserAgent("Text-Search")
if err != nil {
fmt.Println("Error:", err)
return nil, err
return nil, 0, err
}
if debugMode {
@ -37,28 +39,30 @@ func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchRe
resp, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("making request: %v", err)
return nil, 0, fmt.Errorf("making request: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, fmt.Errorf("loading HTML document: %v", err)
return nil, 0, fmt.Errorf("loading HTML document: %v", err)
}
results = parseResults(doc)
duration := time.Since(startTime) // Calculate the duration
if len(results) == 0 {
if debugMode {
log.Println("No results found from Google")
}
}
return results, nil
return results, duration, nil
}
func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string {

View file

@ -6,6 +6,7 @@ import (
"log"
"net/http"
"net/url"
"time"
)
const LIBREX_DOMAIN = "librex.antopie.org"
@ -18,7 +19,8 @@ type LibreXResult struct {
type LibreXResponse []LibreXResult
func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
startTime := time.Now() // Start the timer
// LibreX/Y uses offset instead of page that starts at 0
page--
@ -29,7 +31,7 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe
// User Agent generation
userAgent, err := GetUserAgent("librex-text-search")
if err != nil {
return nil, err
return nil, 0, err
}
if debugMode {
@ -38,24 +40,24 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe
req, err := http.NewRequest("GET", searchURL, nil)
if err != nil {
return nil, err
return nil, 0, err
}
req.Header.Set("User-Agent", userAgent)
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return nil, logError("error making request to LibreX", err)
return nil, 0, logError("error making request to LibreX", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, logError("unexpected status code", fmt.Errorf("%d", resp.StatusCode))
return nil, 0, logError("unexpected status code", fmt.Errorf("%d", resp.StatusCode))
}
var librexResp LibreXResponse
if err := json.NewDecoder(resp.Body).Decode(&librexResp); err != nil {
return nil, logError("error decoding LibreX response", err)
return nil, 0, logError("error decoding LibreX response", err)
}
var results []TextSearchResult
@ -70,7 +72,9 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe
results = append(results, result)
}
return results, nil
duration := time.Since(startTime) // Calculate the duration
return results, duration, nil
}
func logError(message string, err error) error {

71
text.go
View file

@ -4,32 +4,20 @@ import (
"fmt"
"html/template"
"log"
"math/rand"
"net/http"
"sync"
"time"
)
var (
searchEngines []searchEngine
searchEngineLock sync.Mutex
)
type searchEngine struct {
Name string
Func func(string, string, string, int) ([]TextSearchResult, error)
Weight int
}
var textSearchEngines []SearchEngine
func init() {
searchEngines = []searchEngine{
{Name: "Google", Func: PerformGoogleTextSearch, Weight: 1},
{Name: "LibreX", Func: PerformLibreXTextSearch, Weight: 2},
// {Name: "DuckDuckGo", Func: PerformDuckDuckGoTextSearch, Weight: 3}, // DuckDuckGo timeouts too fast and search results are trash
// {Name: "SearXNG", Func: PerformSearXNGTextSearch, Weight: 2}, // Uncomment when implemented
textSearchEngines = []SearchEngine{
{Name: "Google", Func: wrapTextSearchFunc(PerformGoogleTextSearch), Weight: 1},
{Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch), Weight: 2},
{Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch), Weight: 2},
{Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch), Weight: 5}, // DuckDuckGo timeouts too fast and search results are trash
// {Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXNGTextSearch), Weight: 2}, // Uncomment when implemented
}
rand.Seed(time.Now().UnixNano())
}
func HandleTextSearch(w http.ResponseWriter, query, safe, lang string, page int) {
@ -103,17 +91,24 @@ func prefetchPage(query, safe, lang string, page int) {
func fetchTextResults(query, safe, lang string, page int) []TextSearchResult {
var results []TextSearchResult
var err error
var duration time.Duration
for attempts := 0; attempts < len(searchEngines); attempts++ {
engine := selectSearchEngine()
for attempts := 0; attempts < len(textSearchEngines); attempts++ {
engine := selectSearchEngine(textSearchEngines)
log.Printf("Using search engine: %s", engine.Name)
results, err = engine.Func(query, safe, lang, page)
var searchResults []SearchResult
searchResults, duration, err = engine.Func(query, safe, lang, page)
updateEngineMetrics(&engine, duration, err == nil)
if err != nil {
log.Printf("Error performing search with %s: %v", engine.Name, err)
continue
}
for _, result := range searchResults {
results = append(results, result.(TextSearchResult))
}
if len(results) > 0 {
break
}
@ -122,32 +117,18 @@ func fetchTextResults(query, safe, lang string, page int) []TextSearchResult {
return results
}
func selectSearchEngine() searchEngine {
searchEngineLock.Lock()
defer searchEngineLock.Unlock()
totalWeight := 0
for _, engine := range searchEngines {
totalWeight += engine.Weight
func wrapTextSearchFunc(f func(string, string, string, int) ([]TextSearchResult, time.Duration, error)) func(string, string, string, int) ([]SearchResult, time.Duration, error) {
return func(query, safe, lang string, page int) ([]SearchResult, time.Duration, error) {
textResults, duration, err := f(query, safe, lang, page)
if err != nil {
return nil, duration, err
}
randValue := rand.Intn(totalWeight)
for _, engine := range searchEngines {
if randValue < engine.Weight {
// Adjust weights for load balancing
for i := range searchEngines {
if searchEngines[i].Name == engine.Name {
searchEngines[i].Weight = max(1, searchEngines[i].Weight-1)
} else {
searchEngines[i].Weight++
searchResults := make([]SearchResult, len(textResults))
for i, result := range textResults {
searchResults[i] = result
}
return searchResults, duration, nil
}
return engine
}
randValue -= engine.Weight
}
return searchEngines[0] // fallback to the first engine
}
func displayResults(w http.ResponseWriter, results []TextSearchResult, query, lang string, elapsed float64, page int, hasPrevPage, hasNextPage bool) {