Skip to content

Commit

Permalink
ajout code pour traitement noms multiples
Browse files Browse the repository at this point in the history
  • Loading branch information
toutestprismemeca committed Dec 7, 2022
1 parent fd986f6 commit 816519c
Showing 1 changed file with 46 additions and 26 deletions.
72 changes: 46 additions & 26 deletions Etoiles_Europe.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@
"pydref = Pydref()\n",
"\n",
"import pandas as pd, requests, re, string, time, urllib3, os, dotenv\n",
"requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)\n",
"HEADERS = os.environ.get('HEADERS')"
"requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)"
]
},
{
Expand Down Expand Up @@ -73,7 +72,7 @@
"etoiles = etoiles.assign(person = etoiles[\"prenom\"] +\" \"+ etoiles[\"nom\"])\n",
"\n",
"# etoiles = etoiles.assign(type_de_prix = \"Etoiles de l'Europe\")\n",
"etoiles = etoiles.assign(edition=str(edition), type_de_prix=\"Etoiles de l'Europe\", annee=str(an), person=etoiles[\"prenom\"] +\" \"+ etoiles[\"nom\"])"
"etoiles = etoiles.assign(edition=str(edition), type_de_prix=\"Etoiles de l'Europe\", edition=str(an), person=etoiles[\"prenom\"] +\" \"+ etoiles[\"nom\"])"
]
},
{
Expand Down Expand Up @@ -138,6 +137,38 @@
" e['sexe'] = e['sexe'].capitalize()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"project_dict = {}\n",
"for e in et:\n",
" list_to_change=['prenom', 'nom', 'idref', 'idref_lien', 'sexe_code', 'sexe']\n",
" project_id = e['project_id']\n",
" if project_id not in project_dict:\n",
" new_elt = {'project_id': project_id}\n",
" for col in ['mention', 'edition', 'type_de_prix', 'id_struct']:\n",
" new_elt[col] = e[col]\n",
" for col in list_to_change:\n",
" new_elt[col] = []\n",
" else:\n",
" new_elt = project_dict[project_id]\n",
" for col in list_to_change: \n",
" new_elt[col].append(e[col])\n",
" project_dict[project_id] = new_elt\n",
"\n",
"for e in project_dict:\n",
" elt = project_dict[e]\n",
" for f in elt:\n",
" if isinstance(elt[f], list):\n",
" project_dict[e][f] = ';'.join(elt[f])\n",
" \n",
"tmp=pd.DataFrame(project_dict.values())\n",
"tmp=tmp.to_dict(orient=\"records\")"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -147,16 +178,12 @@
"# fonction de match pour les organisations: lien avec id_struct rnsr, siren,...\n",
"\n",
"def struct_matcher(data):\n",
" url_match = \"http://185.161.45.213/organizations/organizations/_match?id=\"\n",
" url = url_match + data.get(\"id_struct\")\n",
"# print(url)\n",
" rq = requests.get(url=url, headers=HEADERS)\n",
" print(rq.text)\n",
" print(data.get('id_struct'))\n",
" url = f\"http://185.161.45.213/organizations/organizations/_match?id={data.get('id_struct')}\"\n",
" rq = requests.get(url=url, headers={'accept':'application/json', 'Authorization': os.environ.get('token')})\n",
" response = rq.json()\n",
"# print(response)\n",
" if response.get(\"hits\") == 1:\n",
" result = response.get(\"data\")[0].get(\"id\")\n",
"# print(result)\n",
" return result"
]
},
Expand All @@ -167,7 +194,7 @@
"outputs": [],
"source": [
"# match pour récupérer le id_scanr des structures et créer un url scanr\n",
"for e in et: \n",
"for e in tmp: \n",
" e[\"structure_identifiant\"] = struct_matcher(e)\n",
" if e[\"structure_identifiant\"]:\n",
" e[\"structure_lien_scanr\"] = \"https://scanr.enseignementsup-recherche.gouv.fr/structure/\" + e.get(\"structure_identifiant\")\n",
Expand All @@ -188,7 +215,7 @@
" url_match = \"http://185.161.45.213/organizations/scanr/\"\n",
" if data.get(\"structure_identifiant\"):\n",
" url = url_match + data.get(\"structure_identifiant\")\n",
" rq = requests.get(url = url, headers=HEADERS)\n",
" rq = requests.get(url = url, headers={'accept':'application/json', 'Authorization': os.environ.get('token')})\n",
" response = {\n",
" \"structure_libelle\": rq.json().get(\"label\", {}).get(\"default\"),\n",
" \"structure_sigle\": rq.json().get(\"acronym\", {}).get(\"fr\"),\n",
Expand All @@ -211,7 +238,7 @@
"outputs": [],
"source": [
"# lancement fonction recup infos des structures et remplace les sauts de ligne par un espace (pas sur que ça marche !)\n",
"for e in et:\n",
"for e in tmp:\n",
" if struct_info(e) is not None:\n",
" e.update(struct_info(e))\n",
" if e.get(\"lat\"):\n",
Expand All @@ -229,7 +256,7 @@
" url_match = \"http://185.161.45.213/datastore/geocodes/\"\n",
" if data.get(\"commune_code\"):\n",
" url = url_match + data.get(\"commune_code\")\n",
" rq = requests.get(url = url, headers=HEADERS)\n",
" rq = requests.get(url = url, headers={'accept':'application/json', 'Authorization': os.environ.get('token')})\n",
" response = {\n",
" \"commune_nom\": rq.json().get(\"com_nom\"),\n",
" \"unite_urbaine_code\": rq.json().get(\"uu_id\"),\n",
Expand All @@ -248,7 +275,7 @@
"metadata": {},
"outputs": [],
"source": [
"for e in et:\n",
"for e in tmp:\n",
" if geocod(e): \n",
" e.update(geocod(e))"
]
Expand All @@ -264,7 +291,7 @@
" id = str(data.get(\"project_id\"))\n",
" if id:\n",
" url = url_match + id\n",
" rq = requests.get(url = url, headers=HEADERS)\n",
" rq = requests.get(url = url, headers={'accept':'application/json', 'Authorization': os.environ.get('token')})\n",
" if rq.status_code == 200:\n",
" if rq.json().get(\"type\") == \"H2020\":\n",
" response = {\n",
Expand Down Expand Up @@ -301,7 +328,7 @@
"metadata": {},
"outputs": [],
"source": [
"for e in et: \n",
"for e in tmp: \n",
" e[\"projet_lien_scanr\"] = \"https://scanr.enseignementsup-recherche.gouv.fr/project/\" + str(e[\"project_id\"])\n",
" e[\"projet_lien_cordis\"] = \"https://cordis.europa.eu/project/id/\" + str(e[\"project_id\"])\n",
" \n",
Expand Down Expand Up @@ -330,7 +357,7 @@
"metadata": {},
"outputs": [],
"source": [
"export = pd.DataFrame.from_dict(et, orient='columns')\n",
"export = pd.DataFrame.from_dict(tmp, orient='columns')\n",
"export = pd.concat([full, export], ignore_index=True)"
]
},
Expand All @@ -340,7 +367,7 @@
"metadata": {},
"outputs": [],
"source": [
"export = export[['type_de_prix', 'edition', 'annee', 'mention', 'prenom', 'nom', 'sexe_code', 'sexe', 'idref', \n",
"export = export[['type_de_prix', 'edition', 'mention', 'prenom', 'nom', 'sexe_code', 'sexe', 'idref', \n",
" 'projet_acronyme', 'projet_titre', 'appel_a_projet_code', 'thematique', \n",
" 'programme_code', 'programme_nom', 'financement_type', 'financement_name', 'project_id', 'projet_resume',\n",
" 'structure_identifiant', 'structure_libelle', 'structure_sigle', \n",
Expand All @@ -361,13 +388,6 @@
"export.to_csv(PATH + \"open data/fr-esr-etoile-de-l-europe.csv\", sep=\";\", \n",
" encoding=\"ANSI\", na_rep=\"\", index=False, line_terminator='\\r')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down

0 comments on commit 816519c

Please sign in to comment.