fix rhub _get_category() bug

NREL · May 13, 2024 · ea8db86 · ea8db86
1 parent 1b11e8a
commit ea8db86
Showing 1 changed file with 12 additions and 6 deletions.
diff --git a/elm/web/rhub.py b/elm/web/rhub.py
@@ -127,8 +127,14 @@ def _scrape_category(self, soup_inst):
         category (str)
         """
 
-        category = soup_inst.find('span',
-                                  {'class': 'type_classification'}).text
+        try:
+            category = soup_inst.find('span',
+                                      {'class':
+                                       'type_classification'}).text
+        except AttributeError:
+            category = soup_inst.find('span',
+                                      {'class':
+                                       'type_classification_parent'}).text
 
         return category
 
@@ -179,7 +185,7 @@ def build_meta(self):
                                                   'authors', 'year',
                                                   'url', 'doi',
                                                   'pdf_url', 'category'))
-        for link in self.all_links[:20]:  # quantity control here #
+        for link in self.all_links[:50]:  # quantity control here #
             with urlopen(link) as page:
                 html = page.read().decode("utf-8")
             meta_soup = BeautifulSoup(html, "html.parser")
@@ -285,7 +291,7 @@ def scrape_publications(self, pdf_dir, txt_dir):
 
         os.makedirs(pdf_dir, exist_ok=True)
         os.makedirs(txt_dir, exist_ok=True)
-        url_list = self.all_links[:20]  # quantity control here #
+        url_list = self.all_links[:50]  # quantity control here #
 
         for pub in url_list:
             with urlopen(pub) as page:
@@ -351,7 +357,7 @@ def build_meta(self):
                                               'email', 'url', 'fn',
                                               'category'
                                               ))
-        for link in url_list[:20]:  # quantity control here #
+        for link in url_list[:50]:  # quantity control here #
             with urlopen(link) as page:
                 html = page.read().decode("utf-8")
             meta_soup = BeautifulSoup(html, "html.parser")
@@ -604,7 +610,7 @@ def scrape_profiles(self, out_dir):
         Text file containing information from the profile.
         """
         os.makedirs(out_dir, exist_ok=True)
-        url_list = self.profile_links[:20]  # quantity control here #
+        url_list = self.profile_links[:50]  # quantity control here #
 
         for i, prof in enumerate(url_list):
             f = os.path.basename(prof) + '.txt'