-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAPICalls.py
84 lines (61 loc) · 2.75 KB
/
APICalls.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
'''
This script grabs movie metadata for a current year from New York Times Movie Review API and OMDB API.
DEPENDENCIES: config.py file with New York Times API key and OMDB API key named nyt_key and omdb_key, respectively
OUTPUT: data/NYTData{year}.csv, data/OMDBData{year}.csv
'''
import time
import pandas as pd
import requests
import json
from NYTCriticsChoicePrediction import config
def get_nyt_movies(key, year):
nyt_data = [] # all movie data from NYT API return
titles = [] # list of only titles, used as query for OMDB data
has_more = True # check if another request is required to get all results
offset = 0
while has_more:
# make API request
params = {"offset": offset, "publication-date": f"{year}-01-01:{year}-12-31",
"api-key": key}
r = requests.get('https://api.nytimes.com/svc/movies/v2/reviews/search.json', params=params)
# load data as Python dictionary
data = json.loads(r.text)
# offset query by an additional 20 and check if an additional iteration is needed
offset += 20
has_more = data['has_more']
# add full data and title names to respective lists
nyt_data.extend(data['results'])
titles.extend([x['display_title'] for x in data['results']])
# ensure the request per minute limit is not reached
time.sleep(6)
# convert to dataframe and write to CSV
nyt_df = pd.DataFrame(nyt_data)
nyt_df.to_csv(f"data/NYTData{year}.csv")
return nyt_data, titles
def get_details_from_omdb(key, year, titles):
omdb_data = []
for title in titles:
# request data for given title
omdb_params = {"apikey": key, "t": title, "y": year, "plot": "full"}
omdb_baseurl = "http://www.omdbapi.com/"
r = requests.get(omdb_baseurl, params=omdb_params)
data = json.loads(r.text)
# if the movie was not found, it may be in the OMDB system under a different year
# try taking out the year parameter
if json.loads(r.text)["Response"] == "False":
omdb_params = {"apikey": key, "t": title, "plot": "full"}
r = requests.get(omdb_baseurl, omdb_params)
data = json.loads(r.text)
# if the OMDB finds a matching movie, add the dictionary to a list
if data["Response"] == "True":
omdb_data.append(data)
# convert to dataframe and write to CSV
omdb_df = pd.DataFrame(omdb_data)
omdb_df.to_csv(f"data/OMDBData{year}.csv")
return omdb_data
if __name__ == '__main__':
nyt_key = config.nyt_key
omdb_key = config.omdb_key
year = 2020 # Change year here!
nyt_results, movie_titles = get_nyt_movies(nyt_key, year)
omdb_results = get_details_from_omdb(omdb_key, year, movie_titles)