Search/images-imgur.go

150 lines
3.9 KiB
Go
Raw Permalink Normal View History

2024-06-10 09:49:40 +00:00
package main
import (
"fmt"
"net/http"
"net/url"
"strconv"
"strings"
"time"
2024-06-10 09:49:40 +00:00
"github.com/PuerkitoBio/goquery"
)
// PerformImgurImageSearch performs an image search on Imgur and returns the results
func PerformImgurImageSearch(query, safe, lang string, page int) ([]ImageSearchResult, time.Duration, error) {
startTime := time.Now() // Start the timer
2024-06-10 09:49:40 +00:00
var results []ImageSearchResult
searchURL := buildImgurSearchURL(query, page)
resp, err := http.Get(searchURL)
if err != nil {
return nil, 0, fmt.Errorf("making request: %v", err)
2024-06-10 09:49:40 +00:00
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, 0, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
2024-06-10 09:49:40 +00:00
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, 0, fmt.Errorf("loading HTML document: %v", err)
2024-06-10 09:49:40 +00:00
}
doc.Find("div.cards div.post").Each(func(i int, s *goquery.Selection) {
thumbnailSrc, exists := s.Find("a img").Attr("src")
if !exists || len(thumbnailSrc) < 25 {
return
}
imgSrc := strings.Replace(thumbnailSrc, "b.", ".", 1)
// Ensure the URLs have the correct protocol
if !strings.HasPrefix(thumbnailSrc, "http") {
thumbnailSrc = "https:" + thumbnailSrc
}
if !strings.HasPrefix(imgSrc, "http") {
imgSrc = "https:" + imgSrc
}
urlPath, exists := s.Find("a").Attr("href")
if !exists {
return
}
// Scrape the image directly from the Imgur page
imgSrc = scrapeImageFromImgurPage("https://imgur.com" + urlPath)
// Remove any query parameters from the URL
imgSrc = removeQueryParameters(imgSrc)
title, _ := s.Find("a img").Attr("alt")
width, _ := strconv.Atoi(s.Find("a img").AttrOr("width", "0"))
height, _ := strconv.Atoi(s.Find("a img").AttrOr("height", "0"))
results = append(results, ImageSearchResult{
Thumbnail: thumbnailSrc,
Title: strings.TrimSpace(title),
Media: imgSrc,
Width: width,
Height: height,
Source: "https://imgur.com" + urlPath,
ThumbProxy: imgSrc, //"/img_proxy?url=" + url.QueryEscape(imgSrc)
})
})
duration := time.Since(startTime) // Calculate the duration
return results, duration, nil
2024-06-10 09:49:40 +00:00
}
// scrapeImageFromImgurPage scrapes the image source from the Imgur page
func scrapeImageFromImgurPage(pageURL string) string {
resp, err := http.Get(pageURL)
if err != nil {
fmt.Printf("Error fetching page: %v\n", err)
return ""
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
fmt.Printf("Unexpected status code: %d\n", resp.StatusCode)
return ""
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
fmt.Printf("Error loading HTML document: %v\n", err)
return ""
}
imgSrc, exists := doc.Find("meta[property='og:image']").Attr("content")
if !exists {
fmt.Printf("Image not found on page: %s\n", pageURL)
return ""
}
// Ensure the URL has the correct protocol
if !strings.HasPrefix(imgSrc, "http") {
imgSrc = "https:" + imgSrc
}
return imgSrc
}
// removeQueryParameters removes query parameters from a URL
func removeQueryParameters(rawURL string) string {
parsedURL, err := url.Parse(rawURL)
if err != nil {
fmt.Printf("Error parsing URL: %v\n", err)
return rawURL
}
parsedURL.RawQuery = ""
return parsedURL.String()
}
func buildImgurSearchURL(query string, page int) string {
baseURL := "https://imgur.com/search/score/all"
params := url.Values{}
params.Add("q", query)
params.Add("qs", "thumbs")
params.Add("p", fmt.Sprintf("%d", page-1))
return fmt.Sprintf("%s?%s", baseURL, params.Encode())
}
// func main() {
// results, duration, err := PerformImgurImageSearch("cats", "true", "en", 1)
2024-06-10 09:49:40 +00:00
// if err != nil {
// fmt.Println("Error:", err)
// return
// }
// fmt.Printf("Search took: %v\n", duration)
2024-06-10 09:49:40 +00:00
// for _, result := range results {
// fmt.Printf("Title: %s\nSource: %s\nMedia: %s\nThumbnail: %s\nThumbProxy: %s\nWidth: %d\nHeight: %d\n\n",
// result.Title, result.Source, result.Media, result.Thumbnail, result.ThumbProxy, result.Width, result.Height)
// }
// }