diff --git a/app.js b/app.js index 5d6ec4d..2f65db1 100644 --- a/app.js +++ b/app.js @@ -6,26 +6,11 @@ const { GoogleGenerativeAI } = require('@google/generative-ai'); const axios = require('axios'); const rateLimit = require('express-rate-limit'); const validator = require('validator'); -const { exec } = require('child_process'); // Import child_process to execute Python scripts require('dotenv').config(); const app = express(); const PORT = 80; -// Run setup script to ensure Python and libraries are installed -exec('bash setup.sh', (error, stdout, stderr) => { - if (error) { - console.error(`Error executing setup script: ${error.message}`); - return; - } - if (stderr) { - console.error(`stderr: ${stderr}`); - return; - } - console.log(`stdout: ${stdout}`); -}); - - // Initialize Google Generative AI const genAI = new GoogleGenerativeAI(process.env.API_KEY); const model = genAI.getGenerativeModel({ model: "gemini-1.5-flash" }); @@ -49,7 +34,9 @@ app.get('/view', (req, res) => { res.sendFile(path.join(__dirname, 'views/view.html')); }); -// Serve snake game + + +// Serve homepage app.get('/snake', (req, res) => { res.sendFile(path.join(__dirname, 'views/snake.html')); }); @@ -81,28 +68,70 @@ const deleteArticlesFolder = () => { // Schedule the deleteArticlesFolder function to run every 24 hours setInterval(deleteArticlesFolder, 24 * 60 * 60 * 1000); // 24 hours in milliseconds -// Function to scrape search results using scrape.py -const scrapePySearch = (query) => { - return new Promise((resolve, reject) => { - const sanitizedQuery = query.replace(/[^a-zA-Z0-9 ]/g, ''); // sanitize query for shell - exec(`python3 scrape.py "${sanitizedQuery}" 10`, (error, stdout, stderr) => { // Limit to 10 results - if (error) { - console.error(`Error executing Python script: ${error.message}`); - reject(error); - } - if (stderr) { - console.error(`stderr from Python script: ${stderr}`); - } - - try { - const results = JSON.parse(stdout); - resolve(results); - } catch (parseError) { - console.error(`Error parsing Python script output: ${parseError.message}`); - reject(parseError); - } - }); - }); +// Function to sanitize scraped data +const sanitizeScrapedData = (text) => { + return text.replace(/[\n\r]/g, ' ').trim(); // Remove newlines, trim whitespace +}; + +// Function to scrape search results from SerpAPI +const scrapeSerpApiSearch = async (query) => { + if (searchCache.has(query)) { + console.log("Serving from cache"); + return searchCache.get(query); + } + + const apiKey = process.env.SERPAPI_API_KEY; + const formattedQuery = encodeURIComponent(query); + const url = `https://serpapi.com/search.json?q=${formattedQuery}&api_key=${apiKey}`; + + try { + const { data } = await axios.get(url); + + if (!data.organic_results || !Array.isArray(data.organic_results)) { + console.error("No organic results found in the response."); + return []; + } + + const links = data.organic_results.map(result => result.link).filter(link => link && link.startsWith('http')); + console.log("Collected URLs:", links); + + // Cache the result for 24 hours + searchCache.set(query, links); + setTimeout(() => searchCache.delete(query), 24 * 60 * 60 * 1000); + + return links; + } catch (error) { + console.error("Error scraping SerpAPI:", error); + return []; + } +}; + +// Function to scrape images from SerpAPI +const scrapeSerpApiImages = async (query) => { + if (searchCache.has(query)) { + console.log("Serving images from cache"); + return searchCache.get(query); + } + + const apiKey = process.env.SERPAPI_API_KEY; + const url = `https://serpapi.com/search.json?engine=google_images&q=${query}&api_key=${apiKey}`; + + try { + const { data } = await axios.get(url); + const images = data.images_results.slice(0, 10).map(img => ({ + thumbnail: img.thumbnail, + original: img.original + })); + + // Cache the result for 24 hours + searchCache.set(query, images); + setTimeout(() => searchCache.delete(query), 24 * 60 * 60 * 1000); + + return images; + } catch (error) { + console.error("Error scraping SerpAPI images:", error); + return []; + } }; // Rate limiter to prevent too many requests @@ -126,12 +155,11 @@ app.post('/search', limiter, async (req, res) => { } try { - // Fetch results from scrape.py - const lookupResult = await scrapePySearch(query); + const lookupResult = await scrapeSerpApiSearch(query); console.log("Scraped URLs:", lookupResult); if (!Array.isArray(lookupResult) || lookupResult.length === 0) { - const errorMsg = "No results found. Please try a different query."; + const errorMsg = "No results found from SerpAPI. Please try a different query."; const articleHtml = fs.readFileSync(path.join(__dirname, 'views/template.html'), 'utf8') .replace(/{{title}}/g, query) .replace(/{{content}}/g, "No content generated as there were no URLs.") @@ -150,33 +178,70 @@ app.post('/search', limiter, async (req, res) => { articleHtml = articleHtml.replace(/{{title}}/g, query); articleHtml = articleHtml.replace(/{{content}}/g, markdownContent); - const urlList = lookupResult.map(url => `
No images available
'; + + articleHtml = articleHtml.replace(/{{imageGallery}}/g, imageGallery); - fs.writeFileSync(filePath, articleHtml); - res.sendFile(filePath); + // Save the generated HTML file + fs.writeFileSync(filePath, articleHtml); + res.sendFile(filePath); + } catch (imageError) { + console.error("Error generating the image gallery:", imageError); + res.status(500).send("Error generating the image gallery."); + } } catch (error) { - console.error("Error during the article generation process:", error.message); + console.error("Error generating the article:", error); res.status(500).send("An unexpected error occurred: " + error.message); } }); -// Start the server + app.listen(PORT, () => { - console.log(`Server is running on http://localhost:${PORT}`); + console.log(`Server is running on port ${PORT}`); }); diff --git a/node_modules/.package-lock.json b/node_modules/.package-lock.json index b5acc24..1206339 100644 --- a/node_modules/.package-lock.json +++ b/node_modules/.package-lock.json @@ -155,12 +155,6 @@ "url": "https://github.com/sponsors/fb55" } }, - "node_modules/child_process": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/child_process/-/child_process-1.0.2.tgz", - "integrity": "sha512-Wmza/JzL0SiWz7kl6MhIKT5ceIlnFPJX+lwUGj7Clhy5MMldsSoJR0+uvRzOS5Kv45Mq7t1PoE8TsOA9bzvb6g==", - "license": "ISC" - }, "node_modules/combined-stream": { "version": "1.0.8", "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", diff --git a/node_modules/child_process/README.md b/node_modules/child_process/README.md deleted file mode 100644 index 5e9a74c..0000000 --- a/node_modules/child_process/README.md +++ /dev/null @@ -1,9 +0,0 @@ -# Security holding package - -This package name is not currently in use, but was formerly occupied -by another package. To avoid malicious use, npm is hanging on to the -package name, but loosely, and we'll probably give it to you if you -want it. - -You may adopt this package by contacting support@npmjs.com and -requesting the name. diff --git a/node_modules/child_process/package.json b/node_modules/child_process/package.json deleted file mode 100644 index 50ba9be..0000000 --- a/node_modules/child_process/package.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "name": "child_process", - "version": "1.0.2", - "description": "", - "main": "index.js", - "scripts": { - "test": "echo \"Error: no test specified\" && exit 1" - }, - "repository": { - "type": "git", - "url": "git+https://github.com/npm/security-holder.git" - }, - "keywords": [], - "author": "", - "license": "ISC", - "bugs": { - "url": "https://github.com/npm/security-holder/issues" - }, - "homepage": "https://github.com/npm/security-holder#readme" -} diff --git a/package-lock.json b/package-lock.json index 748f2c4..e4e936c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,7 +12,6 @@ "@google/generative-ai": "^0.19.0", "axios": "^1.7.7", "cheerio": "^1.0.0", - "child_process": "^1.0.2", "dotenv": "^16.4.5", "express": "^4.21.0", "express-rate-limit": "^7.4.0", @@ -172,12 +171,6 @@ "url": "https://github.com/sponsors/fb55" } }, - "node_modules/child_process": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/child_process/-/child_process-1.0.2.tgz", - "integrity": "sha512-Wmza/JzL0SiWz7kl6MhIKT5ceIlnFPJX+lwUGj7Clhy5MMldsSoJR0+uvRzOS5Kv45Mq7t1PoE8TsOA9bzvb6g==", - "license": "ISC" - }, "node_modules/combined-stream": { "version": "1.0.8", "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", diff --git a/package.json b/package.json index d5a8e31..d047c27 100644 --- a/package.json +++ b/package.json @@ -13,7 +13,6 @@ "@google/generative-ai": "^0.19.0", "axios": "^1.7.7", "cheerio": "^1.0.0", - "child_process": "^1.0.2", "dotenv": "^16.4.5", "express": "^4.21.0", "express-rate-limit": "^7.4.0", diff --git a/scrape.py b/scrape.py deleted file mode 100644 index 33f79cd..0000000 --- a/scrape.py +++ /dev/null @@ -1,77 +0,0 @@ -import json -import sys -import requests -from bs4 import BeautifulSoup - -def search_google(query, num_results): - query = query.replace(" ", "+") - url = f"https://www.google.com/search?q={query}&num={num_results}" - headers = { - "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36" - } - response = requests.get(url, headers=headers) - - if response.status_code != 200: - raise Exception("Failed to fetch the search results. Status Code: {}".format(response.status_code)) - - soup = BeautifulSoup(response.text, 'html.parser') - search_results = [] - - for result in soup.select(".tF2Cxc"): - title = result.select_one(".DKV0Md").text if result.select_one(".DKV0Md") else None - link = result.select_one("a")["href"] if result.select_one("a") else None - snippet = result.select_one(".aCOpRe").text if result.select_one(".aCOpRe") else None - - if title and link: - search_results.append({ - "title": title, - "link": link, - "snippet": snippet - }) - - return search_results[:num_results] - -def search_google_images(query, num_results): - query = query.replace(" ", "+") - url = f"https://www.google.com/search?hl=en&tbm=isch&q={query}&num={num_results}" - headers = { - "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36" - } - response = requests.get(url, headers=headers) - - if response.status_code != 200: - raise Exception("Failed to fetch the image search results. Status Code: {}".format(response.status_code)) - - soup = BeautifulSoup(response.text, 'html.parser') - image_results = [] - - for result in soup.find_all('img'): - img_src = result['src'] - if img_src and len(image_results) < num_results: - image_results.append({"link": img_src}) - - return image_results - -def main(): - if len(sys.argv) < 3: - print("Usage: python3 scrape.py