Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature 35 integrate database storage with analysis script #97

Closed
Closed
85 changes: 24 additions & 61 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
from flask import Flask, render_template, redirect, url_for, request, session, flash, jsonify
from urllib.request import urlopen
from bs4 import BeautifulSoup
import requests
import csv
import json
import os
from flask import Flask, render_template, redirect, url_for, request, flash, jsonify
from scraping import scrape_website, save_to_csv, clear_csv_file
from database import store_analysis_results_in_database

app = Flask(__name__)
app.secret_key = 'your_secret_key' # Setting a secret key for flash messages
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this secret for?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this secret for?
Greetings @stephane-segning
Please Sir, concerning the above secret key, I've been trying to figure it out.
Wish to inform you that I and my team did not add any key to the app.py

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then remove it na


@app.route("/")
def index():
Expand All @@ -16,7 +13,7 @@ def index():
def input():
return render_template("input.html")

#setting up backend to receive urls
# Setting up backend to receive urls
@app.route('/save_url', methods=['POST'])
def scrape():
urls = request.form.getlist('urls')
Expand All @@ -36,65 +33,31 @@ def validate_urls(urls):

return validated_urls


@app.route("/results")
def results():
return render_template("results.html")

# Modified routing based on input fields from the user involving images or number of text
@app.route('/scrape', methods=['POST'])
def scrape():
url = request.form.get('url')
def scrape_data():
urls = request.form.get('urls')
depth = int(request.form.get('depth', 1))
data_to_look_for = request.form.get('data_to_look_for', '')

# Scrape data from the provided URL
scraped_data = scrape_data(url, depth, data_to_look_for)
url_list = [url.strip() for url in urls.split('\n') if url.strip()]

scraped_data = []

for url in url_list:
data = scrape_website(url, depth)
scraped_data.append(data)

# Save scraped data to CSV and JSON files
save_to_csv(scraped_data)
save_to_json(scraped_data)

return render_template('results.html', data=scraped_data)
#intergrating database functionality with analysis script

store_analysis_results_in_database(scraped_data)

def scrape_data(url, depth, data_to_look_for):
try:
response = requests.get(url)
response.raise_for_status() # Raise an HTTPError for bad responses
soup = BeautifulSoup(response.text, 'html.parser')

# Implementing logic to extract relevant data from the BeautifulSoup object
scraped_data = extract_data(soup, depth, data_to_look_for)
return scraped_data
except requests.exceptions.RequestException as e:
print(f"Error during scraping: {e}")
return None

def extract_data(soup, depth, data_to_look_for):
# Implementing logic to extract data here
paragraphs = soup.find_all('p')
scraped_data = [p.text.strip() for p in paragraphs]
return scraped_data[:depth]

def save_to_csv(data):
directory = 'scraped_data'
os.makedirs(directory, exist_ok=True) # Create the directory if it doesn't exist

with open(os.path.join(directory, 'scraped_data.csv'), 'w', newline='', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(['Data'])
for item in data:
writer.writerow([item])

def save_to_json(data):
directory = 'scraped_data'
os.makedirs(directory, exist_ok=True) # Create the directory if it doesn't exist

with open(os.path.join(directory, 'scraped_data.json'), 'w', encoding='utf-8') as jsonfile:
json.dump(data, jsonfile, ensure_ascii=False, indent=2)

@app.route('/api/start-analysis', methods = ['GET', 'POST'])
def analysis():
return render_template('test.html')
flash('Scraping and saving to database successful!', 'success')

return render_template('results.html', data=scraped_data)

if __name__ == '__main__':
app.run(debug=True)
clear_csv_file() # Clear the CSV file before running the application
app.run(debug=True)

Loading