update HTML, update docs app and spinner, update print msg, delete im…

…age preview
claromes · Jun 24, 2024 · ccb53fe · ccb53fe
1 parent ac094de
commit ccb53fe
Show file tree

Hide file tree

Showing 6 changed files with 113 additions and 110 deletions.
diff --git a/app/app.py b/app/app.py
@@ -1,5 +1,5 @@
 import base64
-from datetime import datetime
+from datetime import datetime, timedelta
 
 import streamlit as st
 
@@ -13,13 +13,13 @@
 
 PAGE_ICON = "assets/parthenon.png"
 TITLE = "assets/waybacktweets.png"
-PREVIEW_IMAGE = "assets/preview_image.jpg"
 DOWNLOAD = "assets/download.svg"
 
 collapse = None
 matchtype = None
-start_date = datetime(2006, 1, 1)
+start_date = datetime.now() - timedelta(days=365 * 2)
 end_date = datetime.now()
+min_date = datetime(2006, 1, 1)
 
 # ------ Verbose Mode Configuration ------ #
 
@@ -81,7 +81,7 @@
 # ------ Requestings ------ #
 
 
-@st.cache_data(ttl=600, show_spinner=True)
+@st.cache_data(ttl=600, show_spinner=False)
 def wayback_tweets(
     username,
     collapse,
@@ -105,15 +105,15 @@ def wayback_tweets(
     return archived_tweets
 
 
-@st.cache_data(ttl=600, show_spinner=True)
+@st.cache_data(ttl=600, show_spinner=False)
 def tweets_parser(archived_tweets, username, field_options):
     parser = TweetsParser(archived_tweets, username, field_options)
     parsed_tweets = parser.parse()
 
     return parsed_tweets
 
 
-@st.cache_data(ttl=600, show_spinner=True)
+@st.cache_data(ttl=600, show_spinner=False)
 def tweets_exporter(parsed_tweets, username, field_options):
     exporter = TweetsExporter(parsed_tweets, username, field_options)
 
@@ -135,11 +135,11 @@ def tweets_exporter(parsed_tweets, username, field_options):
 )
 st.write("Retrieve archived tweets CDX data in CSV, JSON, and HTML formats.")
 
-st.caption(
+st.write(
     "This application uses the Wayback Tweets Python package, which can be used as a module or as a standalone command line tool. [Read the documentation](https://claromes.github.io/waybacktweets)."  # noqa: E501
 )
 
-st.caption(
+st.write(
     "To access the legacy version of Wayback Tweets [click here](https://waybacktweets-legacy.streamlit.app)."  # noqa: E501
 )
 
@@ -150,13 +150,14 @@ def tweets_exporter(parsed_tweets, username, field_options):
 username = st.text_input("Username *", key="username", placeholder="Without @")
 
 with st.expander("Filtering"):
-    start_date = datetime(2006, 1, 1)
-    end_date = datetime.now()
 
+    st.caption(
+        ":orange[A large date range takes a long time to process, and the app's resources may not be sufficient. Try to perform searches with smaller ranges to get faster results.]"  # noqa: E501
+    )
     st.session_state.archived_timestamp_filter = st.date_input(
         "Tweets saved between",
         (start_date, end_date),
-        start_date,
+        min_date,
         end_date,
         format="YYYY/MM/DD",
         help="Using the `from` and `to` filters. Format: YYYY/MM/DD",
@@ -178,21 +179,11 @@ def tweets_exporter(parsed_tweets, username, field_options):
             help="Allows for a simple way to scroll through the results",
         )
 
-    col3, col4 = st.columns(2)
-
-    with col3:
-        not_available = st.checkbox(
-            "Only tweets not available",
-            key="not_available",
-            help="Checks if the archived URL still exists on Twitter",
-        )
-
-    with col4:
-        unique = st.checkbox(
-            "Only unique Wayback Machine URLs",
-            key="unique",
-            help="Filtering by the collapse option using the `urlkey` field and the URL Match Scope `prefix`",  # noqa: E501
-        )
+    unique = st.checkbox(
+        "Only unique Wayback Machine URLs",
+        key="unique",
+        help="Filtering by the collapse option using the `urlkey` field and the URL Match Scope `prefix`",  # noqa: E501
+    )
 
 
 query = st.button("Query", type="primary", use_container_width=True)
@@ -208,102 +199,111 @@ def tweets_exporter(parsed_tweets, username, field_options):
         matchtype = "prefix"
 
     try:
-        wayback_tweets = wayback_tweets(
-            st.session_state.current_username,
-            collapse,
-            st.session_state.archived_timestamp_filter[0],
-            st.session_state.archived_timestamp_filter[1],
-            limit,
-            offset,
-            matchtype,
-        )
+        with st.spinner(
+            f"Waybacking @{st.session_state.current_username}'s archived tweets"
+        ):
+            wayback_tweets = wayback_tweets(
+                st.session_state.current_username,
+                collapse,
+                st.session_state.archived_timestamp_filter[0],
+                st.session_state.archived_timestamp_filter[1],
+                limit,
+                offset,
+                matchtype,
+            )
 
         if not wayback_tweets:
             st.error("No data was saved due to an empty response.")
             st.stop()
 
-        parsed_tweets = tweets_parser(
-            wayback_tweets, st.session_state.current_username, FIELD_OPTIONS
-        )
+        with st.spinner(
+            f"Parsing @{st.session_state.current_username}'s archived tweets"
+        ):
+            parsed_tweets = tweets_parser(
+                wayback_tweets, st.session_state.current_username, FIELD_OPTIONS
+            )
 
-        df, file_name = tweets_exporter(
-            parsed_tweets, st.session_state.current_username, FIELD_OPTIONS
-        )
+            df, file_name = tweets_exporter(
+                parsed_tweets, st.session_state.current_username, FIELD_OPTIONS
+            )
 
         csv_data = df.to_csv(index=False)
         json_data = df.to_json(orient="records", lines=False)
         html = HTMLTweetsVisualizer(username, json_data)
         html_content = html.generate()
 
-        st.session_state.count = len(df)
-        st.write(f"**{st.session_state.count} URLs have been captured**")
+        # -- Rendering -- #
 
-        # -- HTML -- #
+        if csv_data and json_data and html_content:
+            st.session_state.count = len(df)
+            st.write(f"**{st.session_state.count} URLs have been captured**")
 
-        st.header("HTML", divider="gray")
-        st.write(
-            f"Visualize tweets more efficiently through iframes. Download the @{st.session_state.current_username}'s archived tweets in HTML."  # noqa: E501
-        )
+            # -- HTML -- #
 
-        col5, col6 = st.columns([1, 18])
+            st.header("HTML", divider="gray")
+            st.write(
+                f"Visualize tweets more efficiently through iframes. Download the @{st.session_state.current_username}'s archived tweets in HTML."  # noqa: E501
+            )
 
-        with col5:
-            st.image(DOWNLOAD, width=22)
+            col5, col6 = st.columns([1, 18])
 
-        with col6:
-            b64_html = base64.b64encode(html_content.encode()).decode()
-            href_html = f"data:text/html;base64,{b64_html}"
+            with col5:
+                st.image(DOWNLOAD, width=22)
 
-            st.markdown(
-                f'<a href="{href_html}" download="{file_name}.html" title="Download {file_name}.html">{file_name}.html</a>',  # noqa: E501
-                unsafe_allow_html=True,
-            )
+            with col6:
+                b64_html = base64.b64encode(html_content.encode()).decode()
+                href_html = f"data:text/html;base64,{b64_html}"
 
-        st.image(PREVIEW_IMAGE, "Preview image")
+                st.markdown(
+                    f'<a href="{href_html}" download="{file_name}.html" title="Download {file_name}.html">{file_name}.html</a>',  # noqa: E501
+                    unsafe_allow_html=True,
+                )
 
-        # -- CSV -- #
+            # -- CSV -- #
 
-        st.header("CSV", divider="gray")
-        st.write(
-            "Check the data returned in the dataframe below and download the file."
-        )
+            st.header("CSV", divider="gray")
+            st.write(
+                "Check the data returned in the dataframe below and download the file."
+            )
 
-        col7, col8 = st.columns([1, 18])
+            col7, col8 = st.columns([1, 18])
 
-        with col7:
-            st.image(DOWNLOAD, width=22)
+            with col7:
+                st.image(DOWNLOAD, width=22)
 
-        with col8:
-            b64_csv = base64.b64encode(csv_data.encode()).decode()
-            href_csv = f"data:file/csv;base64,{b64_csv}"
+            with col8:
+                b64_csv = base64.b64encode(csv_data.encode()).decode()
+                href_csv = f"data:file/csv;base64,{b64_csv}"
 
-            st.markdown(
-                f'<a href="{href_csv}" download="{file_name}.csv" title="Download {file_name}.csv">{file_name}.csv</a>',  # noqa: E501
-                unsafe_allow_html=True,
-            )
+                st.markdown(
+                    f'<a href="{href_csv}" download="{file_name}.csv" title="Download {file_name}.csv">{file_name}.csv</a>',  # noqa: E501
+                    unsafe_allow_html=True,
+                )
 
-        st.dataframe(df, use_container_width=True)
+            st.dataframe(df, use_container_width=True)
 
-        # -- JSON -- #
+            # -- JSON -- #
 
-        st.header("JSON", divider="gray")
-        st.write("Check the data returned in JSON format below and download the file.")
+            st.header("JSON", divider="gray")
+            st.write(
+                "Check the data returned in JSON format below and download the file."
+            )
 
-        col9, col10 = st.columns([1, 18])
+            col9, col10 = st.columns([1, 18])
 
-        with col9:
-            st.image(DOWNLOAD, width=22)
+            with col9:
+                st.image(DOWNLOAD, width=22)
 
-        with col10:
-            b64_json = base64.b64encode(json_data.encode()).decode()
-            href_json = f"data:file/json;base64,{b64_json}"
+            with col10:
+                b64_json = base64.b64encode(json_data.encode()).decode()
+                href_json = f"data:file/json;base64,{b64_json}"
 
-            st.markdown(
-                f'<a href="{href_json}" download="{file_name}.json" title="Download {file_name}.json">{file_name}.json</a>',  # noqa: E501
-                unsafe_allow_html=True,
-            )
+                st.markdown(
+                    f'<a href="{href_json}" download="{file_name}.json" title="Download {file_name}.json">{file_name}.json</a>',  # noqa: E501
+                    unsafe_allow_html=True,
+                )
 
-        st.json(json_data, expanded=False)
+            st.json(json_data, expanded=False)
     except TypeError as e:
         st.error(
             f"""

diff --git a/assets/preview_image.jpg b/assets/preview_image.jpg
diff --git a/waybacktweets/_cli.py b/waybacktweets/_cli.py
@@ -121,7 +121,7 @@ def main(
             username, collapse, timestamp_from, timestamp_to, limit, offset, matchtype
         )
 
-        print("Making a request to the Internet Archive...")
+        print(f"Waybacking @{username}'s archived tweets...")
         archived_tweets = api.get()
 
         if archived_tweets:

diff --git a/waybacktweets/api/export.py b/waybacktweets/api/export.py
@@ -97,23 +97,23 @@ def save_to_json(self) -> None:
         """
         Saves the DataFrame to a JSON file.
         """
-        json_file_path = f"{self.filename}.json"
-        self.dataframe.to_json(json_file_path, orient="records", lines=False)
+        json_path = f"{self.filename}.json"
+        self.dataframe.to_json(json_path, orient="records", lines=False)
 
-        print(f"Saved to {json_file_path}")
+        print(f"Saved to {json_path}")
 
     def save_to_html(self) -> None:
         """
         Saves the DataFrame to an HTML file.
         """
-        json_file_path = f"{self.filename}.json"
+        json_path = f"{self.filename}.json"
 
-        if not os.path.exists(json_file_path):
+        if not os.path.exists(json_path):
             self.save_to_json()
 
         html_file_path = f"{self.filename}.html"
 
-        html = HTMLTweetsVisualizer(self.username, json_file_path, html_file_path)
+        html = HTMLTweetsVisualizer(self.username, json_path, html_file_path)
 
         html_content = html.generate()
         html.save(html_content)

diff --git a/waybacktweets/api/parse.py b/waybacktweets/api/parse.py
@@ -279,7 +279,8 @@ def parse(self, print_progress=False) -> Dict[str, List[Any]]:
                 task = None
                 if print_progress:
                     task = progress.add_task(
-                        f"Waybacking @{self.username} tweets\n", total=len(futures)
+                        f"Parsing @{self.username}'s archived tweets\n",
+                        total=len(futures),
                     )
 
                 for future in as_completed(futures):