work #5
14 changed files with 336 additions and 166 deletions
26
README.md
26
README.md
|
@ -20,15 +20,28 @@
|
|||
- [ ] Better name
|
||||
- [ ] LXC container
|
||||
- [ ] Docker container
|
||||
- [ ] Automatic updates
|
||||
- [ ] Scalable crawlers and webservers + load balacing
|
||||
|
||||
# Go Search Engine
|
||||
# Ocásek (Warp) Search Engine
|
||||
|
||||
A self-hosted [metasearch engine](https://en.wikipedia.org/wiki/Metasearch_engine) that respects privacy, contains no ads, and serves as a proxy/alternative to Google website.
|
||||
A self-hosted private and anonymous [metasearch engine](https://en.wikipedia.org/wiki/Metasearch_engine), that aims to be more resource effichent and scalable. Decentralized services are nice, but juming between instances when one just stops working for some reason is just inconvenient. So thats why this engine can do both, you can self-hoste it or use [officiall instance](https://search.spitfirebrowser.com/).
|
||||
|
||||
## Comparison to other search engines
|
||||
|
||||
| Name | Works without JS | Privacy frontend redirect | Torrent results | API | No 3rd party libs | Scalable | Not Resource Hungry | Dynamic Page Loading |
|
||||
|------------|----------------------|---------------------------|-----------------|-----|-------------------|----------|---------------------------------------------|----------------------|
|
||||
| Whoogle | ✅ | ❓ Only host can set it | ❌ | ❌ | ❌ | ❌ | ❓ Moderate | ❓ Not specified |
|
||||
| Araa-Search| ❌ | ✅ | ✅ | ✅ | ❓ | ❌ | ❌ Very resource hungry | ❌ |
|
||||
| LibreY | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❓ Moderate | ❌ |
|
||||
| Ocásek | ✅ | ✅ | ✅ | ❌ | ✅ [1] | ✅ | ✅ about 20MiB at idle, 21MiB when searching| ✅ |
|
||||
|
||||
[1]: It does not rely on 3rd-party libs for webscraping like [Selenium](https://www.javatpoint.com/selenium-webdriver), but it uses other search instalces like LibreX as fallback.
|
||||
|
||||
## Features
|
||||
|
||||
- Text search using Google search results.
|
||||
- Image search using the Qwant API.
|
||||
- Text search using Google, Brave, DuckDuckGo and LibreX/Y search results.
|
||||
- Image search using the Qwant/Imgur.
|
||||
- Video search using Piped API.
|
||||
- Image viewing using proxy and direct links to image source pages for image searches.
|
||||
- Maps using OpenStreetMap
|
||||
|
@ -39,7 +52,8 @@ A self-hosted [metasearch engine](https://en.wikipedia.org/wiki/Metasearch_engin
|
|||
### Prerequisites
|
||||
|
||||
- Go (version 1.18 or higher recommended)
|
||||
- Access to the internet for fetching results from the Qwant API and Google
|
||||
- Git (unexpected)
|
||||
- Access to the internet for fetching results (even more unexpected)
|
||||
|
||||
### Running the Application
|
||||
|
||||
|
@ -49,3 +63,5 @@ cd Search
|
|||
chmod +x ./run.sh
|
||||
./run.sh
|
||||
```
|
||||
|
||||
*Its that easy!*
|
|
@ -6,7 +6,7 @@ import (
|
|||
|
||||
var (
|
||||
debugMode bool = true
|
||||
funcs = template.FuncMap{
|
||||
funcs = template.FuncMap{
|
||||
"sub": func(a, b int) int {
|
||||
return a - b
|
||||
},
|
||||
|
@ -15,10 +15,3 @@ var (
|
|||
},
|
||||
}
|
||||
)
|
||||
|
||||
func max(a, b int) int {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
|
|
@ -6,28 +6,31 @@ import (
|
|||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
// PerformImgurImageSearch performs an image search on Imgur and returns the results
|
||||
func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, error) {
|
||||
func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, time.Duration, error) {
|
||||
startTime := time.Now() // Start the timer
|
||||
|
||||
var results []ImageSearchResult
|
||||
searchURL := buildImgurSearchURL(query, page)
|
||||
|
||||
resp, err := http.Get(searchURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("making request: %v", err)
|
||||
return nil, 0, fmt.Errorf("making request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("loading HTML document: %v", err)
|
||||
return nil, 0, fmt.Errorf("loading HTML document: %v", err)
|
||||
}
|
||||
|
||||
doc.Find("div.cards div.post").Each(func(i int, s *goquery.Selection) {
|
||||
|
@ -72,7 +75,9 @@ func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchR
|
|||
})
|
||||
})
|
||||
|
||||
return results, nil
|
||||
duration := time.Since(startTime) // Calculate the duration
|
||||
|
||||
return results, duration, nil
|
||||
}
|
||||
|
||||
// scrapeImageFromImgurPage scrapes the image source from the Imgur page
|
||||
|
@ -130,12 +135,13 @@ func buildImgurSearchURL(query string, page int) string {
|
|||
}
|
||||
|
||||
// func main() {
|
||||
// results, err := PerformImgurImageSearch("cats", "true", "en", 1)
|
||||
// results, duration, err := PerformImgurImageSearch("cats", "true", "en", 1)
|
||||
// if err != nil {
|
||||
// fmt.Println("Error:", err)
|
||||
// return
|
||||
// }
|
||||
|
||||
// fmt.Printf("Search took: %v\n", duration)
|
||||
// for _, result := range results {
|
||||
// fmt.Printf("Title: %s\nSource: %s\nMedia: %s\nThumbnail: %s\nThumbProxy: %s\nWidth: %d\nHeight: %d\n\n",
|
||||
// result.Title, result.Source, result.Media, result.Thumbnail, result.ThumbProxy, result.Width, result.Height)
|
||||
|
|
|
@ -25,7 +25,9 @@ type QwantAPIResponse struct {
|
|||
}
|
||||
|
||||
// PerformQwantImageSearch performs an image search on Qwant and returns the results.
|
||||
func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, error) {
|
||||
func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, time.Duration, error) {
|
||||
startTime := time.Now() // Start the timer
|
||||
|
||||
const resultsPerPage = 50
|
||||
var offset int
|
||||
if page <= 1 {
|
||||
|
@ -53,29 +55,29 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR
|
|||
|
||||
req, err := http.NewRequest("GET", apiURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating request: %v", err)
|
||||
return nil, 0, fmt.Errorf("creating request: %v", err)
|
||||
}
|
||||
|
||||
ImageUserAgent, err := GetUserAgent("Image-Search")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", ImageUserAgent)
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("making request: %v", err)
|
||||
return nil, 0, fmt.Errorf("making request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var apiResp QwantAPIResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&apiResp); err != nil {
|
||||
return nil, fmt.Errorf("decoding response: %v", err)
|
||||
return nil, 0, fmt.Errorf("decoding response: %v", err)
|
||||
}
|
||||
|
||||
var results []ImageSearchResult
|
||||
|
@ -91,5 +93,7 @@ func PerformQwantImageSearch(query, safe, lang string, page int) ([]ImageSearchR
|
|||
})
|
||||
}
|
||||
|
||||
return results, nil
|
||||
duration := time.Since(startTime) // Calculate the duration
|
||||
|
||||
return results, duration, nil
|
||||
}
|
||||
|
|
72
images.go
72
images.go
|
@ -4,30 +4,17 @@ import (
|
|||
"fmt"
|
||||
"html/template"
|
||||
"log"
|
||||
"math/rand"
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
imageEngines []imageEngine
|
||||
imageEngineLock sync.Mutex
|
||||
)
|
||||
|
||||
type imageEngine struct {
|
||||
Name string
|
||||
Func func(string, string, string, int) ([]ImageSearchResult, error)
|
||||
Weight int
|
||||
}
|
||||
var imageSearchEngines []SearchEngine
|
||||
|
||||
func init() {
|
||||
imageEngines = []imageEngine{
|
||||
{Name: "Qwant", Func: PerformQwantImageSearch, Weight: 1},
|
||||
{Name: "Imgur", Func: PerformImgurImageSearch, Weight: 2},
|
||||
imageSearchEngines = []SearchEngine{
|
||||
{Name: "Qwant", Func: wrapImageSearchFunc(PerformQwantImageSearch), Weight: 1},
|
||||
{Name: "Imgur", Func: wrapImageSearchFunc(PerformImgurImageSearch), Weight: 2},
|
||||
}
|
||||
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
}
|
||||
|
||||
func handleImageSearch(w http.ResponseWriter, query, safe, lang string, page int) {
|
||||
|
@ -110,50 +97,45 @@ func getImageResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string
|
|||
|
||||
func fetchImageResults(query, safe, lang string, page int) []ImageSearchResult {
|
||||
var results []ImageSearchResult
|
||||
var err error
|
||||
|
||||
for attempts := 0; attempts < len(imageEngines); attempts++ {
|
||||
engine := selectImageEngine()
|
||||
for _, engine := range imageSearchEngines {
|
||||
log.Printf("Using image search engine: %s", engine.Name)
|
||||
|
||||
results, err = engine.Func(query, safe, lang, page)
|
||||
searchResults, duration, err := engine.Func(query, safe, lang, page)
|
||||
updateEngineMetrics(&engine, duration, err == nil)
|
||||
if err != nil {
|
||||
log.Printf("Error performing image search with %s: %v", engine.Name, err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, result := range searchResults {
|
||||
results = append(results, result.(ImageSearchResult))
|
||||
}
|
||||
|
||||
// If results are found, break out of the loop
|
||||
if len(results) > 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// If no results found after trying all engines
|
||||
if len(results) == 0 {
|
||||
log.Printf("No image results found for query: %s", query)
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
func selectImageEngine() imageEngine {
|
||||
imageEngineLock.Lock()
|
||||
defer imageEngineLock.Unlock()
|
||||
|
||||
totalWeight := 0
|
||||
for _, engine := range imageEngines {
|
||||
totalWeight += engine.Weight
|
||||
}
|
||||
|
||||
randValue := rand.Intn(totalWeight)
|
||||
for _, engine := range imageEngines {
|
||||
if randValue < engine.Weight {
|
||||
// Adjust weights for load balancing
|
||||
for i := range imageEngines {
|
||||
if imageEngines[i].Name == engine.Name {
|
||||
imageEngines[i].Weight = max(1, imageEngines[i].Weight-1)
|
||||
} else {
|
||||
imageEngines[i].Weight++
|
||||
}
|
||||
}
|
||||
return engine
|
||||
func wrapImageSearchFunc(f func(string, string, string, int) ([]ImageSearchResult, time.Duration, error)) func(string, string, string, int) ([]SearchResult, time.Duration, error) {
|
||||
return func(query, safe, lang string, page int) ([]SearchResult, time.Duration, error) {
|
||||
imageResults, duration, err := f(query, safe, lang, page)
|
||||
if err != nil {
|
||||
return nil, duration, err
|
||||
}
|
||||
randValue -= engine.Weight
|
||||
searchResults := make([]SearchResult, len(imageResults))
|
||||
for i, result := range imageResults {
|
||||
searchResults[i] = result
|
||||
}
|
||||
return searchResults, duration, nil
|
||||
}
|
||||
|
||||
return imageEngines[0] // fallback to the first engine
|
||||
}
|
||||
|
|
8
run.sh
8
run.sh
|
@ -1,3 +1,7 @@
|
|||
#!/bin/bash
|
||||
#!/bin/sh
|
||||
|
||||
go run main.go common.go init.go open-search.go images.go imageproxy.go images-quant.go images-imgur.go video.go map.go text.go text-searchxng.go text-librex.go text-google.go cache.go forums.go files.go files-torrentgalaxy.go files-thepiratebay.go agent.go
|
||||
# Find all .go files in the current directory
|
||||
GO_FILES=$(find . -name '*.go' -print)
|
||||
|
||||
# Run the Go program
|
||||
go run $GO_FILES
|
||||
|
|
90
search-engine.go
Normal file
90
search-engine.go
Normal file
|
@ -0,0 +1,90 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
searchEngineLock sync.Mutex
|
||||
)
|
||||
|
||||
// SearchEngine struct now includes metrics for calculating reputation.
|
||||
type SearchEngine struct {
|
||||
Name string
|
||||
Func func(string, string, string, int) ([]SearchResult, time.Duration, error)
|
||||
Weight int
|
||||
TotalRequests int
|
||||
TotalTime time.Duration
|
||||
SuccessfulSearches int
|
||||
FailedSearches int
|
||||
}
|
||||
|
||||
// init function seeds the random number generator.
|
||||
func init() {
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
}
|
||||
|
||||
// Selects a search engine based on weighted random selection with dynamic weighting.
|
||||
func selectSearchEngine(engines []SearchEngine) SearchEngine {
|
||||
searchEngineLock.Lock()
|
||||
defer searchEngineLock.Unlock()
|
||||
|
||||
// Recalculate weights based on average response time and success rate.
|
||||
for i := range engines {
|
||||
engines[i].Weight = calculateReputation(engines[i])
|
||||
}
|
||||
|
||||
totalWeight := 0
|
||||
for _, engine := range engines {
|
||||
totalWeight += engine.Weight
|
||||
}
|
||||
|
||||
randValue := rand.Intn(totalWeight)
|
||||
for _, engine := range engines {
|
||||
if randValue < engine.Weight {
|
||||
return engine
|
||||
}
|
||||
randValue -= engine.Weight
|
||||
}
|
||||
|
||||
return engines[0] // fallback to the first engine
|
||||
}
|
||||
|
||||
// Updates the engine's performance metrics.
|
||||
func updateEngineMetrics(engine *SearchEngine, responseTime time.Duration, success bool) {
|
||||
searchEngineLock.Lock()
|
||||
defer searchEngineLock.Unlock()
|
||||
|
||||
engine.TotalRequests++
|
||||
engine.TotalTime += responseTime
|
||||
if success {
|
||||
engine.SuccessfulSearches++
|
||||
} else {
|
||||
engine.FailedSearches++
|
||||
}
|
||||
engine.Weight = calculateReputation(*engine)
|
||||
}
|
||||
|
||||
// Calculates the reputation of the search engine based on average response time and success rate.
|
||||
func calculateReputation(engine SearchEngine) int {
|
||||
const referenceTime = time.Second // 1 second reference time in nanoseconds (1000 ms)
|
||||
|
||||
if engine.TotalRequests == 0 {
|
||||
return 10 // Default weight for new engines
|
||||
}
|
||||
|
||||
// Calculate average response time in seconds.
|
||||
avgResponseTime := engine.TotalTime.Seconds() / float64(engine.TotalRequests)
|
||||
|
||||
// Calculate success rate.
|
||||
successRate := float64(engine.SuccessfulSearches) / float64(engine.TotalRequests)
|
||||
|
||||
// Combine response time and success rate into a single reputation score.
|
||||
// The formula can be adjusted to weigh response time and success rate differently.
|
||||
reputation := (referenceTime.Seconds() / avgResponseTime) * successRate
|
||||
|
||||
// Scale reputation for better interpretability (e.g., multiply by 10)
|
||||
return int(reputation * 10)
|
||||
}
|
|
@ -47,7 +47,7 @@
|
|||
</form>
|
||||
<div class="results_settings">
|
||||
<form>
|
||||
<h1>Settings</h1>
|
||||
<h1>SETTINGS ARE NOT IMPLEMENTED YET</h1>
|
||||
<h2>Theme</h2>
|
||||
<label for="theme-dark">Dark Theme:</label>
|
||||
<input type="checkbox" class="results-settings" id="theme-dark" name="theme" value="dark"><br>
|
||||
|
|
|
@ -67,8 +67,10 @@
|
|||
</div>
|
||||
<br>
|
||||
{{end}}
|
||||
{{else}}
|
||||
{{else if .NoResults}}
|
||||
<div class="no-results">No results found for '{{ .Query }}'. Try different keywords.</div>
|
||||
{{else}}
|
||||
<div class="no-more-results">Looks like this is the end of results.</div>
|
||||
{{end}}
|
||||
</div>
|
||||
<div class="prev-next prev-img" id="prev-next">
|
||||
|
@ -84,26 +86,39 @@
|
|||
</form>
|
||||
</div>
|
||||
<script>
|
||||
document.getElementById('content').classList.remove('js-enabled');
|
||||
|
||||
document.addEventListener("DOMContentLoaded", function() {
|
||||
if (document.getElementById('prev-next')) {
|
||||
document.getElementById('prev-next').style.display = 'none';
|
||||
|
||||
let page = {{ .Page }};
|
||||
const query = "{{ .Query }}";
|
||||
let loading = false;
|
||||
let hasMoreResults = true;
|
||||
|
||||
function loadResults(newPage) {
|
||||
if (loading || !hasMoreResults) return;
|
||||
loading = true;
|
||||
fetch(`/search?q=${encodeURIComponent(query)}&t=text&p=${newPage}`)
|
||||
.then(response => response.text())
|
||||
.then(data => {
|
||||
const parser = new DOMParser();
|
||||
const doc = parser.parseFromString(data, 'text/html');
|
||||
const newResults = doc.getElementById('results').innerHTML;
|
||||
document.getElementById('results').innerHTML += newResults;
|
||||
page = newPage;
|
||||
const noResultsMessage = "No results found for '{{ .Query }}'. Try different keywords.";
|
||||
|
||||
if (newResults.includes(noResultsMessage)) {
|
||||
document.getElementById('results').innerHTML += "<div class='no-more-results'>Looks like this is the end of results.</div>";
|
||||
hasMoreResults = false;
|
||||
} else {
|
||||
document.getElementById('results').innerHTML += newResults;
|
||||
page = newPage;
|
||||
}
|
||||
loading = false;
|
||||
})
|
||||
.catch(error => console.error('Error loading results:', error));
|
||||
.catch(error => {
|
||||
console.error('Error loading results:', error);
|
||||
loading = false;
|
||||
});
|
||||
}
|
||||
|
||||
window.addEventListener('scroll', () => {
|
||||
|
|
78
text-brave.go
Normal file
78
text-brave.go
Normal file
|
@ -0,0 +1,78 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
// PerformBraveTextSearch performs a text search on Brave and returns the results.
|
||||
func PerformBraveTextSearch(query, safe, lang string, offset int) ([]TextSearchResult, time.Duration, error) {
|
||||
startTime := time.Now() // Start the timer
|
||||
var results []TextSearchResult
|
||||
|
||||
// Build the search URL
|
||||
searchURL := fmt.Sprintf("https://search.brave.com/search?q=%s&offset=%d", url.QueryEscape(query), offset)
|
||||
|
||||
req, err := http.NewRequest("GET", searchURL, nil)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("creating request: %v", err)
|
||||
}
|
||||
|
||||
// Set headers including User-Agent
|
||||
TextUserAgent, err := GetUserAgent("Text-Search")
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
req.Header.Set("User-Agent", TextUserAgent)
|
||||
|
||||
// Perform the HTTP request
|
||||
client := &http.Client{}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("performing request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Read the response body
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("reading response body: %v", err)
|
||||
}
|
||||
|
||||
// Parse the response body
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(string(body)))
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("parsing response body: %v", err)
|
||||
}
|
||||
|
||||
// Extract search results
|
||||
doc.Find(".snippet").Each(func(i int, s *goquery.Selection) {
|
||||
title := s.Find(".title").Text()
|
||||
description := s.Find(".snippet-description").Text()
|
||||
url, exists := s.Find("a").Attr("href")
|
||||
|
||||
// Add to results only if all components are present
|
||||
if title != "" && description != "" && exists && url != "" {
|
||||
results = append(results, TextSearchResult{
|
||||
Header: title,
|
||||
URL: url,
|
||||
Description: description,
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
duration := time.Since(startTime) // Calculate the duration
|
||||
|
||||
// Return an error if no results are found
|
||||
if len(results) == 0 {
|
||||
return nil, duration, fmt.Errorf("no results found")
|
||||
}
|
||||
|
||||
return results, duration, nil
|
||||
}
|
|
@ -1,33 +1,34 @@
|
|||
// text-duckduckgo.go
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
|
||||
func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
|
||||
startTime := time.Now() // Start the timer
|
||||
|
||||
var results []TextSearchResult
|
||||
searchURL := buildDuckDuckGoSearchURL(query, page)
|
||||
|
||||
resp, err := http.Get(searchURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("making request: %v", err)
|
||||
return nil, 0, fmt.Errorf("making request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("loading HTML document: %v", err)
|
||||
return nil, 0, fmt.Errorf("loading HTML document: %v", err)
|
||||
}
|
||||
|
||||
doc.Find(".result__body").Each(func(i int, s *goquery.Selection) {
|
||||
|
@ -46,15 +47,14 @@ func PerformDuckDuckGoTextSearch(query, safe, lang string, page int) ([]TextSear
|
|||
Description: strings.TrimSpace(description),
|
||||
}
|
||||
results = append(results, result)
|
||||
if debugMode {
|
||||
log.Printf("Processed DuckDuckGo result: %+v\n", result)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
return results, nil
|
||||
duration := time.Since(startTime) // Calculate the duration
|
||||
|
||||
return results, duration, nil
|
||||
}
|
||||
|
||||
func buildDuckDuckGoSearchURL(query string, page int) string {
|
||||
|
|
|
@ -6,59 +6,59 @@ import (
|
|||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
|
||||
func PerformGoogleTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
|
||||
const resultsPerPage = 10
|
||||
var results []TextSearchResult
|
||||
|
||||
startTime := time.Now() // Start the timer
|
||||
|
||||
client := &http.Client{}
|
||||
searchURL := buildSearchURL(query, safe, lang, page, resultsPerPage)
|
||||
|
||||
req, err := http.NewRequest("GET", searchURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %v", err)
|
||||
return nil, 0, fmt.Errorf("failed to create request: %v", err)
|
||||
}
|
||||
|
||||
// User Agent generation
|
||||
TextUserAgent, err := GetUserAgent("Text-Search")
|
||||
if err != nil {
|
||||
fmt.Println("Error:", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if debugMode {
|
||||
fmt.Println("Generated User Agent (text):", TextUserAgent)
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", TextUserAgent)
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("making request: %v", err)
|
||||
return nil, 0, fmt.Errorf("making request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("loading HTML document: %v", err)
|
||||
return nil, 0, fmt.Errorf("loading HTML document: %v", err)
|
||||
}
|
||||
|
||||
results = parseResults(doc)
|
||||
|
||||
duration := time.Since(startTime) // Calculate the duration
|
||||
|
||||
if len(results) == 0 {
|
||||
if debugMode {
|
||||
log.Println("No results found from Google")
|
||||
}
|
||||
}
|
||||
|
||||
return results, nil
|
||||
return results, duration, nil
|
||||
}
|
||||
|
||||
func buildSearchURL(query, safe, lang string, page, resultsPerPage int) string {
|
||||
|
@ -104,9 +104,6 @@ func parseResults(doc *goquery.Document) []TextSearchResult {
|
|||
Description: description,
|
||||
}
|
||||
results = append(results, result)
|
||||
if debugMode {
|
||||
log.Printf("Google result: %+v\n", result)
|
||||
}
|
||||
})
|
||||
|
||||
return results
|
||||
|
|
|
@ -6,6 +6,7 @@ import (
|
|||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"time"
|
||||
)
|
||||
|
||||
const LIBREX_DOMAIN = "librex.antopie.org"
|
||||
|
@ -18,7 +19,8 @@ type LibreXResult struct {
|
|||
|
||||
type LibreXResponse []LibreXResult
|
||||
|
||||
func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchResult, error) {
|
||||
func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchResult, time.Duration, error) {
|
||||
startTime := time.Now() // Start the timer
|
||||
|
||||
// LibreX/Y uses offset instead of page that starts at 0
|
||||
page--
|
||||
|
@ -29,33 +31,29 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe
|
|||
// User Agent generation
|
||||
userAgent, err := GetUserAgent("librex-text-search")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if debugMode {
|
||||
log.Println("Generated User Agent (text):", userAgent)
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("GET", searchURL, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, 0, err
|
||||
}
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
|
||||
client := &http.Client{}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, logError("error making request to LibreX", err)
|
||||
return nil, 0, logError("error making request to LibreX", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, logError("unexpected status code", fmt.Errorf("%d", resp.StatusCode))
|
||||
return nil, 0, logError("unexpected status code", fmt.Errorf("%d", resp.StatusCode))
|
||||
}
|
||||
|
||||
var librexResp LibreXResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&librexResp); err != nil {
|
||||
return nil, logError("error decoding LibreX response", err)
|
||||
return nil, 0, logError("error decoding LibreX response", err)
|
||||
}
|
||||
|
||||
var results []TextSearchResult
|
||||
|
@ -70,9 +68,16 @@ func PerformLibreXTextSearch(query, safe, lang string, page int) ([]TextSearchRe
|
|||
results = append(results, result)
|
||||
}
|
||||
|
||||
return results, nil
|
||||
duration := time.Since(startTime) // Calculate the duration
|
||||
|
||||
if len(results) == 0 {
|
||||
return nil, duration, fmt.Errorf("no results found")
|
||||
}
|
||||
|
||||
return results, duration, nil
|
||||
}
|
||||
|
||||
// This is just stupid it will probbably lead to printing error twice
|
||||
func logError(message string, err error) error {
|
||||
log.Printf("%s: %v", message, err)
|
||||
return fmt.Errorf("%s: %w", message, err)
|
||||
|
|
74
text.go
74
text.go
|
@ -4,32 +4,20 @@ import (
|
|||
"fmt"
|
||||
"html/template"
|
||||
"log"
|
||||
"math/rand"
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
searchEngines []searchEngine
|
||||
searchEngineLock sync.Mutex
|
||||
)
|
||||
|
||||
type searchEngine struct {
|
||||
Name string
|
||||
Func func(string, string, string, int) ([]TextSearchResult, error)
|
||||
Weight int
|
||||
}
|
||||
var textSearchEngines []SearchEngine
|
||||
|
||||
func init() {
|
||||
searchEngines = []searchEngine{
|
||||
{Name: "Google", Func: PerformGoogleTextSearch, Weight: 1},
|
||||
{Name: "LibreX", Func: PerformLibreXTextSearch, Weight: 2},
|
||||
// {Name: "DuckDuckGo", Func: PerformDuckDuckGoTextSearch, Weight: 3}, // DuckDuckGo timeouts too fast and search results are trash
|
||||
// {Name: "SearXNG", Func: PerformSearXNGTextSearch, Weight: 2}, // Uncomment when implemented
|
||||
textSearchEngines = []SearchEngine{
|
||||
{Name: "Google", Func: wrapTextSearchFunc(PerformGoogleTextSearch), Weight: 1},
|
||||
{Name: "LibreX", Func: wrapTextSearchFunc(PerformLibreXTextSearch), Weight: 2},
|
||||
{Name: "Brave", Func: wrapTextSearchFunc(PerformBraveTextSearch), Weight: 2},
|
||||
{Name: "DuckDuckGo", Func: wrapTextSearchFunc(PerformDuckDuckGoTextSearch), Weight: 5}, // DuckDuckGo timeouts too fast and search results are trash
|
||||
// {Name: "SearXNG", Func: wrapTextSearchFunc(PerformSearXNGTextSearch), Weight: 2}, // Uncomment when implemented
|
||||
}
|
||||
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
}
|
||||
|
||||
func HandleTextSearch(w http.ResponseWriter, query, safe, lang string, page int) {
|
||||
|
@ -102,18 +90,22 @@ func prefetchPage(query, safe, lang string, page int) {
|
|||
|
||||
func fetchTextResults(query, safe, lang string, page int) []TextSearchResult {
|
||||
var results []TextSearchResult
|
||||
var err error
|
||||
|
||||
for attempts := 0; attempts < len(searchEngines); attempts++ {
|
||||
engine := selectSearchEngine()
|
||||
for _, engine := range textSearchEngines {
|
||||
log.Printf("Using search engine: %s", engine.Name)
|
||||
|
||||
results, err = engine.Func(query, safe, lang, page)
|
||||
searchResults, duration, err := engine.Func(query, safe, lang, page)
|
||||
updateEngineMetrics(&engine, duration, err == nil)
|
||||
if err != nil {
|
||||
log.Printf("Error performing search with %s: %v", engine.Name, err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, result := range searchResults {
|
||||
results = append(results, result.(TextSearchResult))
|
||||
}
|
||||
|
||||
// If results are found, break out of the loop
|
||||
if len(results) > 0 {
|
||||
break
|
||||
}
|
||||
|
@ -122,32 +114,18 @@ func fetchTextResults(query, safe, lang string, page int) []TextSearchResult {
|
|||
return results
|
||||
}
|
||||
|
||||
func selectSearchEngine() searchEngine {
|
||||
searchEngineLock.Lock()
|
||||
defer searchEngineLock.Unlock()
|
||||
|
||||
totalWeight := 0
|
||||
for _, engine := range searchEngines {
|
||||
totalWeight += engine.Weight
|
||||
}
|
||||
|
||||
randValue := rand.Intn(totalWeight)
|
||||
for _, engine := range searchEngines {
|
||||
if randValue < engine.Weight {
|
||||
// Adjust weights for load balancing
|
||||
for i := range searchEngines {
|
||||
if searchEngines[i].Name == engine.Name {
|
||||
searchEngines[i].Weight = max(1, searchEngines[i].Weight-1)
|
||||
} else {
|
||||
searchEngines[i].Weight++
|
||||
}
|
||||
}
|
||||
return engine
|
||||
func wrapTextSearchFunc(f func(string, string, string, int) ([]TextSearchResult, time.Duration, error)) func(string, string, string, int) ([]SearchResult, time.Duration, error) {
|
||||
return func(query, safe, lang string, page int) ([]SearchResult, time.Duration, error) {
|
||||
textResults, duration, err := f(query, safe, lang, page)
|
||||
if err != nil {
|
||||
return nil, duration, err
|
||||
}
|
||||
randValue -= engine.Weight
|
||||
searchResults := make([]SearchResult, len(textResults))
|
||||
for i, result := range textResults {
|
||||
searchResults[i] = result
|
||||
}
|
||||
return searchResults, duration, nil
|
||||
}
|
||||
|
||||
return searchEngines[0] // fallback to the first engine
|
||||
}
|
||||
|
||||
func displayResults(w http.ResponseWriter, results []TextSearchResult, query, lang string, elapsed float64, page int, hasPrevPage, hasNextPage bool) {
|
||||
|
@ -177,6 +155,7 @@ func displayResults(w http.ResponseWriter, results []TextSearchResult, query, la
|
|||
HasNextPage bool
|
||||
LanguageOptions []LanguageOption
|
||||
CurrentLang string
|
||||
NoResults bool
|
||||
}{
|
||||
Results: results,
|
||||
Query: query,
|
||||
|
@ -186,6 +165,7 @@ func displayResults(w http.ResponseWriter, results []TextSearchResult, query, la
|
|||
HasNextPage: hasNextPage,
|
||||
LanguageOptions: languageOptions,
|
||||
CurrentLang: lang,
|
||||
NoResults: len(results) == 0,
|
||||
}
|
||||
|
||||
err = tmpl.Execute(w, data)
|
||||
|
|
Loading…
Reference in a new issue