-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
114 lines (94 loc) · 3.92 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from bs4 import BeautifulSoup
import requests, lyricsgenius, json, os, sys
TOP100LIST = "https://playback.fm/charts/country/{year}"
GENIUS_API_KEYS = ["4BS3k-97upk3p39IZNzEptiIjYEd7QTAsUxTgI9n_BdPxg9dwMA-NWsRcO9Nt7aY",
"uzMkjkngNmN143Gh3mznBRUSibemBP3zj-XyahSnZ1ACPgi6mFJVu0Yyw8n5vIus",
"GdyhzUKPIwLzZ2TeBD4OKzUfQCwhXH2CDvan-vMpOLlnH6t0dL4yc78-w8zvejZK"]
def get_year_songs(year, max_songs=100):
print(TOP100LIST.format(year=year))
r = requests.get(TOP100LIST.format(year=year))
soup = BeautifulSoup(r.text, 'html.parser')
t = soup.find_all("table", class_ = "chartTbl")[0]
songs = []
for tr in t.find_all("tr", itemprop="track"):
artist = tr.find_all("a", itemprop="byArtist")[0].getText().strip()
song = tr.find_all("span", class_="song")[0].getText().strip()
songs.append((song, artist))
return songs[0:max_songs]
def lyric_lookup(song, genius):
#artist = genius.search_artist("Shelton")
song = genius.search_song(title=song[0], artist=song[1])
try:
return song.lyrics
except:
return ""
def year_analysis(year, max_songs, genius_wrapper):
#genius_wrapper.verbose = False
songs = get_year_songs(year, max_songs=max_songs)
word_counts = {}
song_counts = {}
for song in songs:
lyrics = lyric_lookup(song, genius_wrapper).lower()
words = []
for punct in ",.?!()\"{}'":
lyrics = lyrics.replace(punct, "")
for word in lyrics.split():
if word in word_counts:
word_counts[word] += 1
else:
word_counts[word] = 1
if word not in words:
words.append(word)
if word in song_counts:
song_counts[word] += 1
else:
song_counts[word] = 1
sorted_words = dict(sorted(word_counts.items(), key=lambda item: item[1]))
sorted_songs = dict(sorted(song_counts.items(), key=lambda item: item[1]))
json.dump(sorted_words, open(f"data/{year}_word_data.json", "w+"), indent=1)
json.dump(sorted_songs, open(f"data/{year}_song_data.json", "w+"), indent=1)
def combine_all():
words = {}
songs = {}
for file in os.listdir("./data"):
if "combined" in file:
continue
print(f"Combining {file}")
contents = json.load(open(f"./data/{file}"))
target = words if "word" in file else songs
for item in contents.items():
if item[0] in target:
target[item[0]] += item[1]
else:
target[item[0]] = item[1]
sorted_words = dict(sorted(words.items(), key=lambda item: item[1]))
sorted_songs = dict(sorted(songs.items(), key=lambda item: item[1]))
json.dump(sorted_words, open(f"data/combined_word_data.json", "w+"), indent=1)
json.dump(sorted_songs, open(f"data/combined_song_data.json", "w+"), indent=1)
def convert_to_csv(file, new_file):
dict = json.load(open(file))
csv = open(new_file, "w+")
for item in dict.items():
csv.write(f"{item[0]}, {item[1]}\n")
csv.close()
def convert_all_to_csv():
for file in os.listdir("./data"):
print(f"Converting {file} to csv")
new_name = file.rsplit(".", 1)[0] + ".csv"
convert_to_csv(f"./data/{file}", f"./csv_data/{new_name}")
def main(start, end):
wrappers = []
for key in GENIUS_API_KEYS:
wrappers.append(lyricsgenius.Genius(key, remove_section_headers=True))
for i in range(start, end):
print("########################")
print(f" Starting Year {i}")
print("########################")
year_analysis(i, max_songs=10, genius_wrapper=wrappers[i%3])
#combine_all()
#convert_all_to_csv()
#"https://genius.com/Roy-acuff-write-me-sweetheart-lyrics"
#https://genius.com/Jimmie-davis-is-it-too-late-now
if __name__ == '__main__':
#print(sys.argv)
main(int(sys.argv[1]), int(sys.argv[2]))