-
Notifications
You must be signed in to change notification settings - Fork 0
/
population.py
129 lines (101 loc) · 3.9 KB
/
population.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import menu
import matplotlib.pyplot as plt
import seaborn as sns
import requests
from bs4 import BeautifulSoup
import sqlite3
import pandas as pd
import streamlit as st
# method to return states from region
def locationString(location):
conn = sqlite3.connect('tang.db', detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES)
cur = conn.cursor()
# check if a region and get states
if (location == "West" or location == "South" or location == "Northeast" or location == "Midwest"):
# Query to retrieve the states in the specified region
sql_query9 = """
SELECT Name
FROM states
WHERE Region = ?
"""
# Execute the SQL query with parameters
cur.execute(sql_query9, (location,))
region = cur.fetchall()
# List of state names in the specified region
states = [row[0] for row in region]
# Convert the list of state names
state_names_str = ', '.join([f"'{state}'" for state in states])
return state_names_str
# Close the connection
conn.close()
else:
return f"'{location}'"
# method to get population data and make graph
def avgPop(location1, location2, yearStart = None, yearEnd = None):
conn = sqlite3.connect('tang.db', detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES)
cur = conn.cursor()
# get states
state_names_str1 = locationString(location1)
state_names_str2 = locationString(location2)
# set conditional
year_condition = ""
if yearStart is not None and yearEnd is not None:
year_condition = f"AND Year >= {yearStart} AND Year <= {yearEnd}"
# query data
sql_query2 = f"""
SELECT o.Year, AVG(population) AS avg_population
FROM popData o
JOIN states s ON o.Region = s.Name
WHERE s.Name IN ({state_names_str1}) {year_condition}
GROUP BY o.Year
"""
# Execute the SQL query and load the results into a DataFrame
data2 = pd.read_sql_query(sql_query2, conn)
# query data
sql_query3 = f"""
SELECT o.Year, AVG(population) AS avg_population
FROM popData o
JOIN states s ON o.Region = s.Name
WHERE s.Name IN ({state_names_str2}) {year_condition}
GROUP BY o.Year
"""
# Execute the SQL query and load the results into a DataFrame
data3 = pd.read_sql_query(sql_query3, conn)
# Close the connection
conn.close()
# Plot the data
fig = plt.figure()
plt.plot(data2['Year'], data2['avg_population'], label=location1)
plt.plot(data3['Year'], data3['avg_population'], label=location2)
# Add labels and title
plt.xlabel('Year')
plt.ylabel('Population')
plt.title(f"Population in {location1} vs {location2} from {yearStart} to {yearEnd}")
# Add legend
plt.legend()
plt.tight_layout() # Adjust layout to prevent clipping of x-axis labels
st.pyplot(fig)
# method to get population data of one region and multiple years and graph
def singlePop(location1, yearStart = None, yearEnd = None):
conn = sqlite3.connect('tang.db', detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES)
cur = conn.cursor()
# get state names from region
state_names_str1 = locationString(location1)
# get year condition
year_condition = ""
if yearStart is not None and yearEnd is not None:
year_condition = f"AND Year >= {yearStart} AND Year <= {yearEnd}"
# query data
sql_query2 = f"""
SELECT o.Year, AVG(population) AS avg_population
FROM popData o
JOIN states s ON o.Region = s.Name
WHERE s.Name IN ({state_names_str1}) {year_condition}
GROUP BY o.Year
"""
# Execute the SQL query
data2 = pd.read_sql_query(sql_query2, conn)
data2['Year'] = data2['Year'].astype(int)
# Close the connection
conn.close()
return(data2)