diff --git a/src/scraper.sh b/src/scraper.sh index 697e840..bb1d27c 100755 --- a/src/scraper.sh +++ b/src/scraper.sh @@ -3,7 +3,7 @@ echo 'Starting scraping' html=$(wget --header "Cookie: pasw_law_cookie=yes" -qO - https://www.itispaleocapa.edu.it/orario-classi/) name=$(echo "$html" | grep -o -P '(?<=\

).*(?=\<\/h2\>)') -pdf_url=$(echo $(echo "$html" | grep -o -P 'src="https:\/\/www\.itispaleocapa\.edu\.it\?url=(\K.*\.pdf)') | sed 's@+@ @g;s@%@\\x@g' | xargs -0 printf "%b") +pdf_url=$(echo $(echo "$html" | grep -o -P 'src="https:\/\/www\.itispaleocapa\.edu\.it[\/]{0,1}\?url=(\K.*\.pdf)') | sed 's@+@ @g;s@%@\\x@g' | xargs -0 printf "%b") wget -qO 'orario.pdf' $pdf_url