diff --git a/elm/web/rhub.py b/elm/web/rhub.py index 926ab897..191f9a4e 100644 --- a/elm/web/rhub.py +++ b/elm/web/rhub.py @@ -127,8 +127,14 @@ def _scrape_category(self, soup_inst): category (str) """ - category = soup_inst.find('span', - {'class': 'type_classification'}).text + try: + category = soup_inst.find('span', + {'class': + 'type_classification'}).text + except AttributeError: + category = soup_inst.find('span', + {'class': + 'type_classification_parent'}).text return category @@ -179,7 +185,7 @@ def build_meta(self): 'authors', 'year', 'url', 'doi', 'pdf_url', 'category')) - for link in self.all_links[:20]: # quantity control here # + for link in self.all_links[:50]: # quantity control here # with urlopen(link) as page: html = page.read().decode("utf-8") meta_soup = BeautifulSoup(html, "html.parser") @@ -285,7 +291,7 @@ def scrape_publications(self, pdf_dir, txt_dir): os.makedirs(pdf_dir, exist_ok=True) os.makedirs(txt_dir, exist_ok=True) - url_list = self.all_links[:20] # quantity control here # + url_list = self.all_links[:50] # quantity control here # for pub in url_list: with urlopen(pub) as page: @@ -351,7 +357,7 @@ def build_meta(self): 'email', 'url', 'fn', 'category' )) - for link in url_list[:20]: # quantity control here # + for link in url_list[:50]: # quantity control here # with urlopen(link) as page: html = page.read().decode("utf-8") meta_soup = BeautifulSoup(html, "html.parser") @@ -604,7 +610,7 @@ def scrape_profiles(self, out_dir): Text file containing information from the profile. """ os.makedirs(out_dir, exist_ok=True) - url_list = self.profile_links[:20] # quantity control here # + url_list = self.profile_links[:50] # quantity control here # for i, prof in enumerate(url_list): f = os.path.basename(prof) + '.txt'