-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature/age gap analysis #23
base: main
Are you sure you want to change the base?
Changes from 19 commits
e4601e6
b571425
c38d073
f6a0062
cf863ed
fa119b6
bab4e41
092bf4b
8ccc1ce
a130a2e
997ef5e
c25cf06
1e8acab
567a8ed
44402af
409cef7
e6bb03c
a04f1c9
b57ff19
e71528a
95fb5dc
afcee48
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -129,4 +129,7 @@ dmypy.json | |
.pyre/ | ||
.vscode | ||
.DS_Store | ||
*.csv | ||
*.csv | ||
|
||
# IDE | ||
.idea/** |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
import sys | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Qu'est-ce que fait ce script ? Il permet d'aller sauvegarder des données ? |
||
sys.path.append("../..") | ||
import bechdelai.data.wikipedia as wiki | ||
import bechdelai.data.tmdb as tmdb | ||
import process_couples as pc | ||
import outputformat as ouf | ||
import pandas as pd | ||
from datetime import datetime | ||
import requests | ||
import io | ||
import spacy | ||
from spacy import displacy | ||
from spacy.matcher import Matcher | ||
from spacy.tokens import Span | ||
from spacy.matcher import PhraseMatcher | ||
from pathlib import Path | ||
|
||
|
||
class Movie: | ||
def __init__(self, title, release_year=None): | ||
self.title = title | ||
self.release_year = release_year | ||
self.plot = self.get_plot() | ||
self.cast_wiki = self.get_cast_wiki() | ||
self.cast = self.get_cast_tmdb() | ||
|
||
def __repr__(self): | ||
return self.__str__() | ||
|
||
def __str__(self): | ||
return "Film : {}".format(self.title) | ||
|
||
def get_plot(self): | ||
for query_suffix in [' ('+str(self.release_year)+' film)',' (film)','']: | ||
try: | ||
return wiki.get_section_text(self.title+query_suffix, ['Plot'])['Plot'] # to improve | ||
except ValueError: | ||
continue | ||
return None | ||
|
||
def get_cast_wiki(self): | ||
return pc.get_cast_from_wikipedia(self.title,self.release_year) | ||
|
||
def get_cast_tmdb(self): | ||
movie_id = tmdb.get_best_tmdb_id(self.title,self.release_year) | ||
|
||
# get casting data | ||
data = tmdb.get_movie_cast_from_id(movie_id) | ||
tmdb_cast = pd.DataFrame(data["cast"]) | ||
wiki_cast = self.cast_wiki | ||
cast_df = pc.correct_cast_with_wikipedia(tmdb_cast,wiki_cast) | ||
|
||
# only use simple quotation marks' | ||
cast_df.replace(regex=r'\"',value="'",inplace=True) | ||
|
||
#remove any accents | ||
cast_df['name'] = cast_df['name'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8') | ||
cast_df['character'] = cast_df['character'].str.normalize('NFKD').str.encode('ascii', errors='ignore').str.decode('utf-8') | ||
|
||
# get release date | ||
release_date = tmdb.get_movie_details_from_id(movie_id)['release_date'] | ||
release_date = datetime.strptime(release_date, '%Y-%m-%d') | ||
# complete with actors/actress ages | ||
cast_df['age_at_release'] = pc.compute_cast_age(cast_df,release_date) | ||
|
||
return cast_df | ||
|
||
def main(): | ||
verbs = ['kisses', 'sleeps with', 'goes on a date with', 'has sex with', 'marries', 'is in love with','is in couple with', | ||
'is the father of', 'is the mother of','is a friend of', 'is in the family of', 'is the enemy of'] | ||
hp4 = Movie("Harry Potter and the Goblet of Fire",2005) | ||
ans = pc.compute_relationships_in_movie(hp4, verbs) | ||
ans.to_csv('hp4.csv') | ||
|
||
call_me = Movie("Call Me by Your Name",2017) | ||
ans = pc.compute_relationships_in_movie(call_me.cast,call_me.plot, verbs) | ||
ans.to_csv('call_me.csv') | ||
|
||
lebowski = Movie("The Big Lebowski",1998) | ||
ans = pc.compute_relationships_in_movie(lebowski.cast,lebowski.plot, verbs) | ||
ans.to_csv('lebowski.csv') | ||
|
||
print(ans) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
import sys | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On peut soit garder pour le moment toute la démo streamlit dans les notebooks, soit le sortir dans un autre repo (peut être dans un second temps) Si des scripts sont importants -> les mettre dans la librairie |
||
sys.path.append("../..") | ||
from age_gap_automation import Movie | ||
import process_couples as pc | ||
import bechdelai.data.wikipedia as wiki | ||
import bechdelai.data.tmdb as tmdb | ||
|
||
import streamlit as st | ||
import pandas as pd | ||
import plotly.express as px | ||
|
||
MOVIE_FILES = {"Harry Potter and the Goblet of Fire":"hp4.csv", | ||
"Call me by your name":"call_me.csv", | ||
"The Big Lebowski":"lebowski.csv", | ||
"Love Actually":"love_actually.csv"} | ||
MOVIE_YEARS = {"Harry Potter and the Goblet of Fire":2005, | ||
"Call me by your name":2017, | ||
"The Big Lebowski":1998, | ||
"Love Actually":2003} | ||
|
||
VERBS = ['kisses', 'sleeps with', 'goes on a date with', 'has sex with', 'marries', 'is in love with','is in couple with', 'is the father of', 'is the mother of'] | ||
LOVE_VERBS = ['kisses', 'sleeps with', 'goes on a date with', 'has sex with', 'marries', 'is in love with','is in couple with'] | ||
|
||
@st.cache | ||
def load_data_from_file(file): | ||
return pd.read_csv(file) | ||
def load_data(movie): | ||
return pc.compute_relationships_in_movie(movie.cast,movie.plot, VERBS) | ||
|
||
|
||
def main(): | ||
st.set_page_config(layout="wide") | ||
title = st.selectbox("Choose a movie:",list(MOVIE_FILES.keys())) | ||
st.title(title) | ||
st.subheader('Romantic relationships') | ||
|
||
|
||
movie = Movie(title,MOVIE_YEARS[title]) | ||
cast = movie.cast | ||
|
||
try: | ||
scores = load_data_from_file(MOVIE_FILES[title]) | ||
except FileNotFoundError: | ||
with st.spinner('Wait for it...'): | ||
scores = load_data(movie) | ||
|
||
scores.sort_values('score',ascending=False,inplace=True) | ||
scores.drop_duplicates(['star1','star2'],keep='first',inplace=True) # TO DO: avoid duplicates when star1 and star2 are inversed | ||
|
||
count=0 | ||
for i,row in scores.iterrows(): | ||
|
||
if row.question not in LOVE_VERBS: | ||
continue | ||
|
||
if (count==10) | (row.score<0.7): | ||
break | ||
|
||
star_younger = {'name':row.star1, | ||
'character':row.character1, | ||
'age':cast[cast.name==row.star1]['age_at_release'].iloc[0], | ||
'gender':cast[cast.name==row.star1]['gender'].iloc[0], | ||
'image' : tmdb.get_person_image_from_id(row.star_id1)["profiles"][0]["file_path"] } | ||
star_older = {'name':row.star2, | ||
'character':row.character2, | ||
'age':cast[cast.name==row.star2]['age_at_release'].iloc[0], | ||
'gender':cast[cast.name==row.star2]['gender'].iloc[0], | ||
'image' : tmdb.get_person_image_from_id(row.star_id2)["profiles"][0]["file_path"] } | ||
|
||
if star_younger['age'] > star_older['age']: | ||
star_aux = star_younger | ||
star_younger = star_older | ||
star_older = star_aux | ||
|
||
|
||
|
||
st.subheader('{} and {}'.format(star_younger['character'], star_older['character'])) | ||
st.write('They were played by {} and {} respectively. '.format(star_younger['name'], star_older['name'])) | ||
st.write('Age gap: ' ,row.age_gap) | ||
|
||
col1, col2, col3,col4,col5 = st.columns([1.5,5,1.5,2,10]) | ||
col1.image('https://image.tmdb.org/t/p/original'+star_younger['image'],width=100) | ||
|
||
|
||
values = col2.slider( | ||
'', | ||
10, 50, | ||
(star_younger['age'], star_older['age']), | ||
disabled=True, key = "slider_"+str(i)) | ||
|
||
col3.image('https://image.tmdb.org/t/p/original'+star_older['image'],width=100) | ||
|
||
# relationship_true = col4.radio('Is this relationship true?', ['Yes', 'No'],key = "radio_"+str(i)) | ||
count+=1 | ||
|
||
|
||
|
||
if __name__ == "__main__": | ||
main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Utilise plutôt ici from .scrap import get_json_from_url en import relatif, ça permet d'éviter des bugs