From dd2bca94d109bfce537a712050e870ed78737aa2 Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Mon, 31 Jul 2023 10:17:09 +0200 Subject: [PATCH] [SoFIFA] Fix read_leagues Fixes #317 --- soccerdata/sofifa.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/soccerdata/sofifa.py b/soccerdata/sofifa.py index 70797e0e..4bfef09c 100644 --- a/soccerdata/sofifa.py +++ b/soccerdata/sofifa.py @@ -1,4 +1,5 @@ """Scraper for http://sofifa.com.""" +import json import re from datetime import timedelta from itertools import product @@ -74,6 +75,7 @@ def __init__( no_store=no_store, data_dir=data_dir, ) + self.rate_limit = 1 if versions == "latest": self.versions = self.read_versions().tail(n=1) elif versions == "all": @@ -93,19 +95,22 @@ def read_leagues(self) -> pd.DataFrame: pd.DataFrame """ # read home page (overview) - filepath = self.data_dir / "index.html" - reader = self.get(SO_FIFA_API, filepath) + filepath = self.data_dir / "leagues.json" + urlmask = SO_FIFA_API + "/api/league" + reader = self.get(urlmask, filepath) + response = json.load(reader) # extract league links leagues = [] - tree = html.parse(reader) - for node in tree.xpath("//select[@id='choices-lg']/optgroup/option"): - leagues.append( - { - "league_id": int(node.get("value")), - "league": node.text, - } - ) + for node in response["data"]: + for child in node["childs"]: + leagues.append( + { + "league_id": child["id"], + "league": f'[{child["nationName"]}] {child["value"]}', + } + ) + print(pd.DataFrame(leagues)) return ( pd.DataFrame(leagues) .pipe(self._translate_league)