Update maps

Binnette · Feb 26, 2024 · bdd8007 · bdd8007
1 parent 7408082
commit bdd8007
Show file tree

Hide file tree

Showing 25 changed files with 27,242 additions and 12,177 deletions.
diff --git a/Scrapping/Scrapping.md b/Scrapping/Scrapping.md
@@ -96,18 +96,19 @@ for p in *.jpeg; do
 done
 ```
 
-## [DEPRECATED] Scrapping hikes
+## Scrap hikes for CSV
 
 ```js
 var events = [];
 document.querySelectorAll("ul.w-full > li").forEach(eventElement => {
-  var timestamp = Number(eventElement.querySelector("time").getAttribute("datetime"));
-  var date = new Date(timestamp);
-  var dateString = `'${date.toISOString().slice(0, 10)}`;
-  var suffix = "", km = "", dplus = "", ele = "", comment = "", attendees = "";
+  var strDateTime = eventElement.querySelector("time").textContent;
+  var iso = strDateTime.replace(/,|Sun|AM/g, "").trim() + "Z";
+  var date = new Date(iso);
+  var dateString = `${date.toISOString().slice(0, 10)}`;
+  var km = "", dplus = "", people = "";
   var attendeesSection = eventElement.querySelector(".items-center span.hidden");
   if (attendeesSection) {
-    attendees = Number(attendeesSection.innerText.split(" ")[0]);
+    people = Number(attendeesSection.innerText.split(" ")[0]);
   }
   var titleElement = eventElement.querySelector(".ds-font-title-3");
   var title = titleElement.innerText.replaceAll('"', "'");
@@ -119,7 +120,10 @@ document.querySelectorAll("ul.w-full > li").forEach(eventElement => {
   } else if (title.includes('🥾') || title.toLowerCase().includes('hike')) {
     type = 'Hike';
   }
-  var url = eventElement.querySelector("a").href;
+  var url = eventElement.querySelector("a").href.trim("/");
+  if (url.endsWith("/")) url = url.slice(0, -1);
+  parts = url.split("/");
+  var id = parts[parts.length - 1];
   var trails = [];
   eventElement.querySelectorAll("a[href*='https://s.42l.fr']").forEach(trailElement => {
     trails.push(trailElement.href);
@@ -128,9 +132,12 @@ document.querySelectorAll("ul.w-full > li").forEach(eventElement => {
   km = distanceMatch ? parseFloat(distanceMatch[1]) : '';
   var dplusMatch = eventElement.textContent.match(/D\+: ([0-9]+)m/);
   dplus = dplusMatch ? parseFloat(dplusMatch[1]) : '';
-  events.unshift([dateString, suffix, km, dplus, ele, attendees, `"${title}"`, type, comment, url, trails.join(";")].join(","));
+  var albumTitle = title.replaceAll(":", " ").replaceAll("  ", " ").replaceAll(" ", "-").trim();
+  var album = `${dateString}-${albumTitle}.html`;
+  events.unshift([`'${dateString}`, "", km, dplus, "", people, `"${title}"`, type, "", id, url, trails.join(";"), "", "", "", `"${album}"`, ""].join(","));
 });
-var headers = ["date", "suffix", "km", "dplus", "ele", "attendees", "title", "type", "comment", "url", "trails"];
+var headers = ["Date", "Suffix", "KM", "Dplus", "Top", "People", "Name", "Type", "Comment", "id", "EventLink", "TrailShortLink", "TrailFullLink", "Trail1", "Trail2", "Album", "HavePhoto"];
+
 events.unshift(headers.join(","));
 events.push(headers.join(","));
 events.join('\n');

diff --git a/Scrapping/data.json b/Scrapping/data.json
diff --git a/Scrapping/eventsJsonToCsv.py b/Scrapping/eventsJsonToCsv.py
@@ -5,7 +5,7 @@
 import requests
 
 # Open the data.json file and load it as a python dictionary
-with open("data.json") as f:
+with open("data.json", encoding='utf-8') as f:
   data = json.load(f)
 
 # Get the events from the data dictionary

diff --git a/Scrapping/eventsJsonToEventsMarkdown.py b/Scrapping/eventsJsonToEventsMarkdown.py
@@ -46,7 +46,7 @@ def merge_events_lists(list1, list2):
       # Update the value of dict1 with the value of dict2
       dict1[key].update(dict2[key])
     else:
-      print(f'event not found: {key} - {dict1[key]["title"]}')
+      print(f'🔴 Event not found in CSV: {key} - {dict1[key]["title"]}')
 
   # Convert the dictionary values to a list
   return list(dict1.values())
@@ -73,14 +73,14 @@ def format_description(description):
 
   # Loop through the string list
   for line in string_list:
-    if re.match("\*\*.*\*\*", line):
+    if re.match(r"\*\*.*\*\*", line):
       line = line.replace("**", "")
 
-    if re.match("^=+$", line):
+    if re.match(r"^=+$", line):
       line = "---------------"
-    elif re.match("^[\s=]+$", line):
+    elif re.match(r"^[\s=]+$", line):
       line = ""
-    elif re.match("^=.*", line):
+    elif re.match(r"^=.*", line):
       line = line.lstrip("=")
       line = line.rstrip("=")
       # Add "##" at the start of the line
@@ -108,8 +108,9 @@ def createMarkdownFileForEvent(event):
   create = event["createdTime"]
   going = event["going"]["totalCount"]
 
-  if "Suffix" not in event:
-    print(f'🔴 No Suffix for hike {title}')
+  if "Suffix" not in event or "KM" not in event:
+    print(f'🔴 Hike not found in CSV file: {start} - {title}')
+    return
 
   suffix = event["Suffix"]
   km = event["KM"]
@@ -122,9 +123,6 @@ def createMarkdownFileForEvent(event):
   trailFullLink = event["TrailFullLink"]
   album = event["Album"]
 
-  if f'{people}' != f'{going}':
-    print(f'🔴 Error people={people} and going={going}')
-
   # Convert the date and time strings to datetime objects
   start = datetime.datetime.fromisoformat(start)
   end = datetime.datetime.fromisoformat(end)
@@ -145,11 +143,14 @@ def createMarkdownFileForEvent(event):
   if len(suffix) > 1:
     date_str_with_suffix = f'{date_str}-{suffix}'
 
+  if f'{people}' != f'{going}':
+    print(f'🟡 Warning csv.people={people} json.going={going} for hike: {date_str} - {title}')
+
   # Create the markdown file name
   md_file = f"../Stats/events/{date_str_with_suffix}.md"
 
   # Write the markdown content to the file
-  with open(md_file, "w") as f:
+  with open(md_file, "w", encoding='utf-8') as f:
     f.write(f"---\n")
     f.write(f"layout: default\n")
     f.write(f"title: {title}\n")
@@ -174,7 +175,7 @@ def createMarkdownFileForEvent(event):
     f.write(f"- [Album]({albumUrl})\n")
     f.write(f"- [Meetup event]({event_url})\n")
 
-    print(f"Markdown file created: {md_file}")
+    #print(f"Markdown file created: {md_file}")
     return {
       "file": f"{date_str_with_suffix}.md",
       "date": start,
@@ -206,7 +207,7 @@ def create_events_index(events):
   md_file = "../Stats/events/index.md"
 
   # Write the markdown content to the file
-  with open(md_file, "w") as f:
+  with open(md_file, "w", encoding='utf-8') as f:
     f.write(f"---\n")
     f.write(f"layout: default\n")
     f.write(f"title: Events index\n")
@@ -231,7 +232,7 @@ def create_events_index(events):
 csv_events = csv_to_array(csv_file)
 
 # Open the json file and load the data
-with open(json_file, "r") as f:
+with open(json_file, "r", encoding='utf-8') as f:
     data = json.load(f)
 
 # Get the events from the data dictionary
@@ -246,6 +247,9 @@ def create_events_index(events):
 # Create a list of csv rows by mapping each filtered event to a csv row
 markdown_files = [createMarkdownFileForEvent(event) for event in merged_events]
 
+# create a new list without None values
+markdown_files = [e for e in markdown_files if e is not None]
+
 # Sort the list by data in ascending order using a lambda function
 markdown_files.sort(key=lambda e: e['date'])