-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Create broken link checker cron jobs
- Loading branch information
1 parent
410ce4f
commit cc3ef0d
Showing
2 changed files
with
100 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
name: new link check | ||
|
||
on: | ||
push: | ||
pull_request: # temporarily execute on all branches | ||
schedule: | ||
- cron: "21 40 * * *" | ||
|
||
jobs: | ||
get-links: | ||
runs-on: ubuntu-latest | ||
outputs: | ||
matrix: ${{ steps.set-matrix.outputs.matrix }} | ||
steps: | ||
- id: set-matrix | ||
run: | | ||
flatpages=$(curl https://cantusdatabase.org/flatpages-list/ | awk '{ gsub (" ", "\",\"", $0); print}') | ||
articles=$(curl https://cantusdatabase.org/articles-list/ | awk '{ gsub (" ", "\",\"", $0); print}') | ||
list="{\"links\": [\"${flatpages}\",\"${articles}\"]}" | ||
echo $list | ||
echo "matrix=$list" >> $GITHUB_OUTPUT | ||
link-Checker: | ||
runs-on: ubuntu-latest | ||
needs: get-links | ||
strategy: | ||
fail-fast: false | ||
max-parallel: 4 | ||
matrix: ${{fromJson(needs.get-links.outputs.matrix)}} | ||
steps: | ||
- uses: actions/checkout@v3 | ||
- name: Link Checker | ||
id: lychee | ||
uses: lycheeverse/[email protected] | ||
with: | ||
args: --exclude http:\/\/cantus\.sk.* ${{ matrix.links }} | ||
format: json | ||
output: /tmp/link-checker.txt | ||
- name: parsing output | ||
run: | | ||
echo "***Python Version***" | ||
python --version | ||
echo "***Invoking parsing script***" | ||
python "$GITHUB_WORKSPACE/scripts/parse_broken_link_checker_output.py" >> $GITHUB_STEP_SUMMARY | ||
echo "***Printing step summary***" | ||
cat $GITHUB_STEP_SUMMARY |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import json | ||
import sys | ||
from pathlib import Path | ||
import sys | ||
|
||
print(f"Running: {sys.argv[0]}", file=sys.stderr) | ||
|
||
FILE_LOCATION = "/tmp/link-checker.txt" | ||
|
||
# If link checker doesn't have any errors exit gracefully | ||
if not Path(FILE_LOCATION).exists(): | ||
print("# ✅ No Broken Link") | ||
sys.exit(0) | ||
else: | ||
print("# Broken Link found, parsing needed", file=sys.stderr) | ||
|
||
# Loading link checker output result | ||
with open(FILE_LOCATION) as f: | ||
print(f"Parsing the json data for {FILE_LOCATION}", file=sys.stderr) | ||
link_checker_result = json.load(f) | ||
|
||
listOfFailure = link_checker_result['fail_map'] | ||
|
||
if not listOfFailure: | ||
print("# ✅ No Broken Link") | ||
sys.exit(0) | ||
|
||
RealErrors = [] | ||
skipErrors = [] | ||
|
||
for failureWebSite in listOfFailure: # looping through tested websites | ||
for failure in listOfFailure[failureWebSite]: # looping through broken links | ||
errorCode = failure['status'].get('code') | ||
if not errorCode: # if there's a timeout its a client side issue so will not exit 1, but just print as an additional problem | ||
skipErrors.append(failure) | ||
continue | ||
|
||
# Find all 4xx errors | ||
if 400 <= errorCode and 500 > errorCode: | ||
RealErrors.append(failure) | ||
else: | ||
skipErrors.append(failure) | ||
|
||
if RealErrors: | ||
print("# Broken Link") | ||
for error in RealErrors: | ||
print(f"* {error['url']}: {error['status']['code']}") | ||
|
||
if skipErrors: | ||
print("# Skippable error Link") | ||
for error in skipErrors: | ||
print(f"* {error['url']}: {error['status']['text']}") | ||
|
||
if RealErrors: | ||
sys.exit(1) |