caching for "files" and images + moved search result types

This commit is contained in:
partisan 2024-05-24 14:07:16 +02:00
parent bc6046db03
commit 9eb763bdf0
8 changed files with 257 additions and 140 deletions

View file

@ -17,6 +17,7 @@
- [ ] JS applets for results (such as calculator) - [ ] JS applets for results (such as calculator)
- [ ] Dynamic results loading as user scrolls - [ ] Dynamic results loading as user scrolls
- [ ] Replace fonts, replace icons font for SVG or remove unnecessary icons for faster loading - [ ] Replace fonts, replace icons font for SVG or remove unnecessary icons for faster loading
- [ ] Better name
- [ ] LXC container - [ ] LXC container
- [ ] Docker container - [ ] Docker container
@ -30,6 +31,8 @@ A self-hosted [metasearch engine](https://en.wikipedia.org/wiki/Metasearch_engin
- Image search using the Qwant API. - Image search using the Qwant API.
- Video search using Piped API. - Video search using Piped API.
- Image viewing using proxy and direct links to image source pages for image searches. - Image viewing using proxy and direct links to image source pages for image searches.
- Maps using OpenStreetMap
- Files download using torrent sites.
## Getting Started ## Getting Started
@ -46,18 +49,3 @@ cd Search
chmod +x ./run.sh chmod +x ./run.sh
./run.sh ./run.sh
``` ```
## Project Structure
- `main.go`: The entry point of the application, setting up the web server and routing.
- `text.go`: Handles text search requests, fetching results from Google and processing them for display.
- `images.go`: Contains logic for handling image search requests, including fetching data from the Qwant API and preparing it for the template.
- `imageproxy.go`: Part of images.go srach logic, handles image reuslts and displays them using proxy.
- `/templates`: Directory containing HTML templates for rendering the search interface and results.
- `search.html`: The main search page template.
- `text.html`: Template for displaying text search results.
- `images.html`: Template for displaying image search results.
- `videos.html`: Template for displaying video search results.
- `/static/css`: Directory for CSS stylesheets.
- `style.css`: The main stylesheet for the search interface and results.
- `/static/css`: Directory for fonts and icons (as font).

102
cache.go
View file

@ -1,3 +1,4 @@
// common_cache.go
package main package main
import ( import (
@ -6,7 +7,12 @@ import (
"time" "time"
) )
// TextSearchResult represents a single search result item. var resultsCache = NewResultsCache(6 * time.Hour) // Cache with 6-hour expiration
// SearchResult is a generic interface for all types of search results.
type SearchResult interface{}
// Define various search result types implementing SearchResult interface
type TextSearchResult struct { type TextSearchResult struct {
URL string URL string
Header string Header string
@ -14,17 +20,59 @@ type TextSearchResult struct {
Source string Source string
} }
type ImageSearchResult struct {
Thumbnail string
Title string
Media string
Width int
Height int
Source string
ThumbProxy string
}
type VideoResult struct {
Href string
Title string
Date string
Views string
Creator string
Publisher string
Image string
Duration string
}
type TorrentResult struct {
URL string
Seeders int
Leechers int
Magnet string
Views int
Size string
Title string
Error string
}
type ForumSearchResult struct {
URL string `json:"url"`
Header string `json:"header"`
Description string `json:"description"`
PublishedDate time.Time `json:"publishedDate"`
ImgSrc string `json:"imgSrc,omitempty"`
ThumbnailSrc string `json:"thumbnailSrc,omitempty"`
}
// CacheKey represents the key used to store search results in the cache. // CacheKey represents the key used to store search results in the cache.
type CacheKey struct { type CacheKey struct {
Query string Query string
Page int Page int
Safe string Safe bool
Lang string Lang string
Type string
} }
// CachedItem represents an item stored in the cache with an expiration time. // CachedItem represents an item stored in the cache with an expiration time.
type CachedItem struct { type CachedItem struct {
Results []TextSearchResult Results []SearchResult
StoredTime time.Time StoredTime time.Time
} }
@ -44,7 +92,7 @@ func NewResultsCache(expiration time.Duration) *ResultsCache {
} }
// Get retrieves the results for a given key from the cache. // Get retrieves the results for a given key from the cache.
func (rc *ResultsCache) Get(key CacheKey) ([]TextSearchResult, bool) { func (rc *ResultsCache) Get(key CacheKey) ([]SearchResult, bool) {
rc.mu.Lock() rc.mu.Lock()
defer rc.mu.Unlock() defer rc.mu.Unlock()
@ -63,7 +111,7 @@ func (rc *ResultsCache) Get(key CacheKey) ([]TextSearchResult, bool) {
} }
// Set stores the results for a given key in the cache. // Set stores the results for a given key in the cache.
func (rc *ResultsCache) Set(key CacheKey, results []TextSearchResult) { func (rc *ResultsCache) Set(key CacheKey, results []SearchResult) {
rc.mu.Lock() rc.mu.Lock()
defer rc.mu.Unlock() defer rc.mu.Unlock()
rc.results[rc.keyToString(key)] = CachedItem{ rc.results[rc.keyToString(key)] = CachedItem{
@ -74,5 +122,47 @@ func (rc *ResultsCache) Set(key CacheKey, results []TextSearchResult) {
// keyToString converts a CacheKey to a string representation. // keyToString converts a CacheKey to a string representation.
func (rc *ResultsCache) keyToString(key CacheKey) string { func (rc *ResultsCache) keyToString(key CacheKey) string {
return fmt.Sprintf("%s|%d|%s|%s", key.Query, key.Page, key.Safe, key.Lang) return fmt.Sprintf("%s|%d|%t|%s|%s", key.Query, key.Page, key.Safe, key.Lang, key.Type)
}
// Helper functions to convert between generic SearchResult and specific ImageSearchResult
func convertToSearchResults(results interface{}) []SearchResult {
switch res := results.(type) {
case []TextSearchResult:
genericResults := make([]SearchResult, len(res))
for i, r := range res {
genericResults[i] = r
}
return genericResults
case []TorrentResult:
genericResults := make([]SearchResult, len(res))
for i, r := range res {
genericResults[i] = r
}
return genericResults
case []ImageSearchResult:
genericResults := make([]SearchResult, len(res))
for i, r := range res {
genericResults[i] = r
}
return genericResults
}
return nil
}
func convertToSpecificResults(results []SearchResult) ([]TextSearchResult, []TorrentResult, []ImageSearchResult) {
var textResults []TextSearchResult
var torrentResults []TorrentResult
var imageResults []ImageSearchResult
for _, r := range results {
switch res := r.(type) {
case TextSearchResult:
textResults = append(textResults, res)
case TorrentResult:
torrentResults = append(torrentResults, res)
case ImageSearchResult:
imageResults = append(imageResults, res)
}
}
return textResults, torrentResults, imageResults
} }

124
files.go
View file

@ -21,17 +21,6 @@ type TorrentSite interface {
Search(query string, category string) ([]TorrentResult, error) Search(query string, category string) ([]TorrentResult, error)
} }
type TorrentResult struct {
URL string
Seeders int
Leechers int
Magnet string
Views int
Size string
Title string
Error string
}
var ( var (
torrentGalaxy TorrentSite torrentGalaxy TorrentSite
nyaa TorrentSite nyaa TorrentSite
@ -49,35 +38,10 @@ func initializeTorrentSites() {
func handleFileSearch(w http.ResponseWriter, query, safe, lang string, page int) { func handleFileSearch(w http.ResponseWriter, query, safe, lang string, page int) {
startTime := time.Now() startTime := time.Now()
settings := Settings{UxLang: lang, Safe: safe} cacheKey := CacheKey{Query: query, Page: page, Safe: safe == "true", Lang: lang, Type: "file"}
sites := []TorrentSite{torrentGalaxy, nyaa, thePirateBay, rutor} combinedResults := getFileResultsFromCacheOrFetch(cacheKey, query, safe, lang, page)
results := []TorrentResult{}
allErrors := true
for _, site := range sites { sort.Slice(combinedResults, func(i, j int) bool { return combinedResults[i].Seeders > combinedResults[j].Seeders })
if site == nil {
continue
}
res, err := site.Search(query, "all")
if err != nil {
continue
}
if len(res) > 0 {
allErrors = false
}
for _, r := range res {
r.Magnet = url.QueryEscape(removeMagnetLink(r.Magnet)) // Remove "magnet:" and encode url
results = append(results, r)
}
}
if allErrors || len(results) == 0 || results[len(results)-1].Title == "" || results[len(results)-1].Title == " " {
results = []TorrentResult{
{Error: "Results are currently unavailable, sorry. Please try again later."},
}
}
sort.Slice(results, func(i, j int) bool { return results[i].Seeders > results[j].Seeders })
elapsedTime := time.Since(startTime) elapsedTime := time.Since(startTime)
funcMap := template.FuncMap{ funcMap := template.FuncMap{
@ -102,19 +66,19 @@ func handleFileSearch(w http.ResponseWriter, query, safe, lang string, page int)
Page int Page int
Settings Settings Settings Settings
}{ }{
Results: results, Results: combinedResults,
Query: query, Query: query,
Fetched: fmt.Sprintf("%.2f", elapsedTime.Seconds()), Fetched: fmt.Sprintf("%.2f", elapsedTime.Seconds()),
Category: "all", Category: "all",
Sort: "seed", Sort: "seed",
HasPrevPage: page > 1, HasPrevPage: page > 1,
HasNextPage: len(results) > 0, HasNextPage: len(combinedResults) > 0,
Page: page, Page: page,
Settings: settings, Settings: Settings{UxLang: lang, Safe: safe},
} }
// Debugging: Print results before rendering template // Debugging: Print results before rendering template
for _, result := range results { for _, result := range combinedResults {
fmt.Printf("Title: %s, Magnet: %s\n", result.Title, result.Magnet) fmt.Printf("Title: %s, Magnet: %s\n", result.Title, result.Magnet)
} }
@ -124,13 +88,75 @@ func handleFileSearch(w http.ResponseWriter, query, safe, lang string, page int)
} }
} }
//this is so fucking stupid, but it does not work otherwise func getFileResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string, page int) []TorrentResult {
func removeMagnetLink(magnet string) string { cacheChan := make(chan []SearchResult)
// Remove the magnet: prefix if it exists var combinedResults []TorrentResult
if strings.HasPrefix(magnet, "magnet:?") {
magnet = strings.TrimPrefix(magnet, "magnet:?") go func() {
results, exists := resultsCache.Get(cacheKey)
if exists {
log.Println("Cache hit")
cacheChan <- results
} else {
log.Println("Cache miss")
cacheChan <- nil
}
}()
select {
case results := <-cacheChan:
if results == nil {
combinedResults = fetchAndCacheFileResults(query, safe, lang, page)
} else {
_, torrentResults, _ := convertToSpecificResults(results)
combinedResults = torrentResults
}
case <-time.After(2 * time.Second):
log.Println("Cache check timeout")
combinedResults = fetchAndCacheFileResults(query, safe, lang, page)
} }
return magnet
return combinedResults
}
func fetchAndCacheFileResults(query, safe, lang string, page int) []TorrentResult {
sites := []TorrentSite{torrentGalaxy, nyaa, thePirateBay, rutor}
results := []TorrentResult{}
allErrors := true
for _, site := range sites {
if site == nil {
continue
}
res, err := site.Search(query, "all")
if err != nil {
continue
}
if len(res) > 0 {
allErrors = false
}
for _, r := range res {
r.Magnet = url.QueryEscape(removeMagnetLink(r.Magnet)) // Remove "magnet:" and encode url
results = append(results, r)
}
}
if allErrors || len(results) == 0 || results[len(results)-1].Title == "" || results[len(results)-1].Title == " " {
return []TorrentResult{
{Error: "Results are currently unavailable, sorry. Please try again later."},
}
}
// Cache the valid results
cacheKey := CacheKey{Query: query, Page: page, Safe: safe == "true", Lang: lang, Type: "file"}
resultsCache.Set(cacheKey, convertToSearchResults(results))
return results
}
func removeMagnetLink(magnet string) string {
// Remove the magnet: prefix unconditionally
return strings.TrimPrefix(magnet, "magnet:?")
} }
func subtract(a, b int) int { func subtract(a, b int) int {

View file

@ -10,15 +10,6 @@ import (
"time" "time"
) )
type ForumSearchResult struct {
URL string `json:"url"`
Header string `json:"header"`
Description string `json:"description"`
PublishedDate time.Time `json:"publishedDate"`
ImgSrc string `json:"imgSrc,omitempty"`
ThumbnailSrc string `json:"thumbnailSrc,omitempty"`
}
func PerformRedditSearch(query string, safe string, page int) ([]ForumSearchResult, error) { func PerformRedditSearch(query string, safe string, page int) ([]ForumSearchResult, error) {
const ( const (
pageSize = 25 pageSize = 25

2
go.mod
View file

@ -3,7 +3,7 @@ module searchengine
go 1.18 go 1.18
require ( require (
github.com/PuerkitoBio/goquery v1.9.1 // indirect github.com/PuerkitoBio/goquery v1.9.1 // direct
github.com/andybalholm/cascadia v1.3.2 // indirect github.com/andybalholm/cascadia v1.3.2 // indirect
golang.org/x/net v0.21.0 // indirect golang.org/x/net v0.21.0 // indirect
) )

View file

@ -10,17 +10,6 @@ import (
"time" "time"
) )
// ImageSearchResult represents a single image result
type ImageSearchResult struct {
Thumbnail string
Title string
Media string
Width int
Height int
Source string
ThumbProxy string
}
// QwantAPIResponse represents the JSON response structure from Qwant API // QwantAPIResponse represents the JSON response structure from Qwant API
type QwantAPIResponse struct { type QwantAPIResponse struct {
Data struct { Data struct {
@ -116,14 +105,12 @@ func fetchImageResults(query string, safe, lang string, page int) ([]ImageSearch
// HandleImageSearch is the HTTP handler for image search requests // HandleImageSearch is the HTTP handler for image search requests
func handleImageSearch(w http.ResponseWriter, query, safe, lang string, page int) { func handleImageSearch(w http.ResponseWriter, query, safe, lang string, page int) {
results, err := fetchImageResults(query, safe, lang, page) startTime := time.Now()
if err != nil {
log.Printf("Error performing image search: %v", err)
http.Error(w, "Internal Server Error", http.StatusInternalServerError)
return
}
// Parsing the template file with the custom function map cacheKey := CacheKey{Query: query, Page: page, Safe: safe == "true", Lang: lang, Type: "image"}
combinedResults := getImageResultsFromCacheOrFetch(cacheKey, query, safe, lang, page)
elapsedTime := time.Since(startTime)
tmpl, err := template.New("images.html").Funcs(funcs).ParseFiles("templates/images.html") tmpl, err := template.New("images.html").Funcs(funcs).ParseFiles("templates/images.html")
if err != nil { if err != nil {
log.Printf("Error parsing template: %v", err) log.Printf("Error parsing template: %v", err)
@ -141,14 +128,14 @@ func handleImageSearch(w http.ResponseWriter, query, safe, lang string, page int
HasPrevPage bool HasPrevPage bool
HasNextPage bool HasNextPage bool
}{ }{
Results: results, Results: combinedResults,
Query: query, Query: query,
Page: page, Page: page,
Fetched: fmt.Sprintf("%.2f seconds", time.Since(time.Now()).Seconds()), Fetched: fmt.Sprintf("%.2f seconds", elapsedTime.Seconds()),
LanguageOptions: languageOptions, LanguageOptions: languageOptions,
CurrentLang: lang, CurrentLang: lang,
HasPrevPage: page > 1, HasPrevPage: page > 1,
HasNextPage: len(results) >= 50, HasNextPage: len(combinedResults) >= 50,
} }
err = tmpl.Execute(w, data) err = tmpl.Execute(w, data)
@ -157,3 +144,50 @@ func handleImageSearch(w http.ResponseWriter, query, safe, lang string, page int
http.Error(w, "Internal Server Error", http.StatusInternalServerError) http.Error(w, "Internal Server Error", http.StatusInternalServerError)
} }
} }
func getImageResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string, page int) []ImageSearchResult {
cacheChan := make(chan []SearchResult)
var combinedResults []ImageSearchResult
go func() {
results, exists := resultsCache.Get(cacheKey)
if exists {
log.Println("Cache hit")
cacheChan <- results
} else {
log.Println("Cache miss")
cacheChan <- nil
}
}()
select {
case results := <-cacheChan:
if results == nil {
combinedResults = fetchAndCacheImageResults(query, safe, lang, page)
} else {
_, _, imageResults := convertToSpecificResults(results)
combinedResults = imageResults
}
case <-time.After(2 * time.Second):
log.Println("Cache check timeout")
combinedResults = fetchAndCacheImageResults(query, safe, lang, page)
}
return combinedResults
}
func fetchAndCacheImageResults(query, safe, lang string, page int) []ImageSearchResult {
results, err := fetchImageResults(query, safe, lang, page)
if err != nil || len(results) == 0 {
log.Printf("Error fetching image results: %v", err)
return []ImageSearchResult{
{Title: "Results are currently unavailable, sorry. Please try again later."},
}
}
// Cache the valid results
cacheKey := CacheKey{Query: query, Page: page, Safe: safe == "true", Lang: lang, Type: "image"}
resultsCache.Set(cacheKey, convertToSearchResults(results))
return results
}

45
text.go
View file

@ -12,8 +12,7 @@ import (
) )
var ( var (
debugMode bool debugMode bool
resultsCache = NewResultsCache(6 * time.Hour) // Cache with 6-hour expiration
) )
func init() { func init() {
@ -25,8 +24,8 @@ func HandleTextSearch(w http.ResponseWriter, query, safe, lang string, page int)
startTime := time.Now() startTime := time.Now()
const resultsPerPage = 10 const resultsPerPage = 10
cacheKey := CacheKey{Query: query, Page: page, Safe: safe, Lang: lang} cacheKey := CacheKey{Query: query, Page: page, Safe: safe == "true", Lang: lang, Type: "text"}
combinedResults := getResultsFromCacheOrFetch(cacheKey, query, safe, lang, page, resultsPerPage) combinedResults := getTextResultsFromCacheOrFetch(cacheKey, query, safe, lang, page, resultsPerPage)
hasPrevPage := page > 1 hasPrevPage := page > 1
hasNextPage := len(combinedResults) == resultsPerPage hasNextPage := len(combinedResults) == resultsPerPage
@ -39,8 +38,8 @@ func HandleTextSearch(w http.ResponseWriter, query, safe, lang string, page int)
} }
} }
func getResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string, page, resultsPerPage int) []TextSearchResult { func getTextResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string, page, resultsPerPage int) []TextSearchResult {
cacheChan := make(chan []TextSearchResult) cacheChan := make(chan []SearchResult)
var combinedResults []TextSearchResult var combinedResults []TextSearchResult
go func() { go func() {
@ -55,48 +54,52 @@ func getResultsFromCacheOrFetch(cacheKey CacheKey, query, safe, lang string, pag
}() }()
select { select {
case combinedResults = <-cacheChan: case results := <-cacheChan:
if combinedResults == nil { if results == nil {
combinedResults = fetchResultsUntilFull(query, safe, lang, page, resultsPerPage) combinedResults = fetchTextResultsUntilFull(query, safe, lang, page, resultsPerPage)
resultsCache.Set(cacheKey, combinedResults) resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
} else {
textResults, _, _ := convertToSpecificResults(results)
combinedResults = textResults
} }
case <-time.After(2 * time.Second): case <-time.After(2 * time.Second):
log.Println("Cache check timeout") log.Println("Cache check timeout")
combinedResults = fetchResultsUntilFull(query, safe, lang, page, resultsPerPage) combinedResults = fetchTextResultsUntilFull(query, safe, lang, page, resultsPerPage)
resultsCache.Set(cacheKey, combinedResults) resultsCache.Set(cacheKey, convertToSearchResults(combinedResults))
} }
return combinedResults return combinedResults
} }
func cacheNextPageIfNotCached(query, safe, lang string, page, resultsPerPage int) { func cacheNextPageIfNotCached(query, safe, lang string, page, resultsPerPage int) {
cacheKey := CacheKey{Query: query, Page: page, Safe: safe, Lang: lang} cacheKey := CacheKey{Query: query, Page: page, Safe: safe == "true", Lang: lang, Type: "text"}
if _, exists := resultsCache.Get(cacheKey); !exists { if _, exists := resultsCache.Get(cacheKey); !exists {
log.Printf("Next page %d not cached, caching now...", page) log.Printf("Next page %d not cached, caching now...", page)
nextPageResults := fetchResultsUntilFull(query, safe, lang, page, resultsPerPage) nextPageResults := fetchTextResultsUntilFull(query, safe, lang, page, resultsPerPage)
resultsCache.Set(cacheKey, nextPageResults) resultsCache.Set(cacheKey, convertToSearchResults(nextPageResults))
} else { } else {
log.Printf("Next page %d already cached", page) log.Printf("Next page %d already cached", page)
} }
} }
func fetchResultsUntilFull(query, safe, lang string, targetPage, resultsPerPage int) []TextSearchResult { func fetchTextResultsUntilFull(query, safe, lang string, targetPage, resultsPerPage int) []TextSearchResult {
var combinedResults []TextSearchResult var combinedResults []TextSearchResult
currentPage := 1 currentPage := 1
resultsNeeded := targetPage * resultsPerPage resultsNeeded := targetPage * resultsPerPage
for len(combinedResults) < resultsNeeded { for len(combinedResults) < resultsNeeded {
cacheKey := CacheKey{Query: query, Page: currentPage, Safe: safe, Lang: lang} cacheKey := CacheKey{Query: query, Page: targetPage, Safe: safe == "true", Lang: lang, Type: "text"}
cachedResults, exists := resultsCache.Get(cacheKey) cachedResults, exists := resultsCache.Get(cacheKey)
if exists { if exists {
combinedResults = append(combinedResults, cachedResults...) textResults, _, _ := convertToSpecificResults(cachedResults)
combinedResults = append(combinedResults, textResults...)
} else { } else {
results := fetchAndCacheResults(query, safe, lang, currentPage, resultsPerPage) results := fetchAndCacheTextResults(query, safe, lang, currentPage, resultsPerPage)
if len(results) == 0 { if len(results) == 0 {
break break
} }
combinedResults = append(combinedResults, results...) combinedResults = append(combinedResults, results...)
resultsCache.Set(cacheKey, results) resultsCache.Set(cacheKey, convertToSearchResults(results))
} }
currentPage++ currentPage++
@ -120,7 +123,7 @@ func fetchResultsUntilFull(query, safe, lang string, targetPage, resultsPerPage
return combinedResults[startIndex:endIndex] return combinedResults[startIndex:endIndex]
} }
func fetchAndCacheResults(query, safe, lang string, page, resultsPerPage int) []TextSearchResult { func fetchAndCacheTextResults(query, safe, lang string, page, resultsPerPage int) []TextSearchResult {
var combinedResults []TextSearchResult var combinedResults []TextSearchResult
var wg sync.WaitGroup var wg sync.WaitGroup
var mu sync.Mutex var mu sync.Mutex

View file

@ -32,18 +32,6 @@ var (
mu sync.Mutex mu sync.Mutex
) )
// VideoResult reflects the structured data for a video result
type VideoResult struct {
Href string
Title string
Date string
Views string
Creator string
Publisher string
Image string
Duration string
}
// VideoAPIResponse matches the structure of the JSON response from the Piped API // VideoAPIResponse matches the structure of the JSON response from the Piped API
type VideoAPIResponse struct { type VideoAPIResponse struct {
Items []struct { Items []struct {
@ -99,11 +87,8 @@ func checkDisabledInstancesPeriodically() {
ticker := time.NewTicker(retryDuration) ticker := time.NewTicker(retryDuration)
defer ticker.Stop() defer ticker.Stop()
for { for range ticker.C {
select { checkAndReactivateInstances()
case <-ticker.C:
checkAndReactivateInstances()
}
} }
} }