From 7675dbfc00bce890b6906a85b00a444b06c6c32c Mon Sep 17 00:00:00 2001 From: Pedro Baltazar Date: Sat, 24 Aug 2024 12:21:04 +0200 Subject: [PATCH] remove dev scripts --- autotest.Rexec | 14 ------ dev.r | 62 ------------------------ misc/spiderbar_issue_2_minimal_example.R | 38 --------------- 3 files changed, 114 deletions(-) delete mode 100644 autotest.Rexec delete mode 100644 dev.r delete mode 100644 misc/spiderbar_issue_2_minimal_example.R diff --git a/autotest.Rexec b/autotest.Rexec deleted file mode 100644 index 93f1f54..0000000 --- a/autotest.Rexec +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/Rscript - -library(testthat) - -if( Sys.info()["nodename"]=="ZUKD208" ){ - auto_test( - code_path="C:/Dropbox/RPackages/robotstxt/r", - test_path="C:/Dropbox/RPackages/robotstxt/tests/testthat" - ) -} - -if( Sys.info()["sysname"]=="Linux" ){ - auto_test_package(pkg = "~/Dropbox/RPackages/robotstxt", reporter = "summary") -} diff --git a/dev.r b/dev.r deleted file mode 100644 index 8f51e78..0000000 --- a/dev.r +++ /dev/null @@ -1,62 +0,0 @@ -# ok -if( request$status < 400 ){ - rtxt <- - httr::content( - request, - encoding = encoding, - as = "text" - ) - - # check if robots.txt is parsable - if ( is_valid_robotstxt(rtxt) ){ - rt_cache[[domain]] <- request - }else{ - # dump file - fname_tmp <- - tempfile(pattern = "robots_", fileext = ".txt") - - writeLines( - text = rtxt, - con = fname_tmp, - useBytes = TRUE - ) - - # give back a digest of the retrieved file - if( warn ){ - message( - "\n\n", - "[domain] ", domain, " --> ", fname_tmp, - "\n\n", - substring(paste(rtxt, collapse = "\n"), 1, 200),"\n", "[...]", - "\n\n" - ) - } - - - # found file but could not parse it - can happen, everything is allowed - # --> treated as if there was no file - warning(paste0( - "get_robotstxt(): ", domain, "; Not valid robots.txt." - )) - rtxt <- "" - rt_cache[[domain]] <- request - } -} - -# not found - can happen, everything is allowed -if( request$status == 404 ){ - if(warn){ - warning(paste0( - "get_robotstxt(): ", domain, "; HTTP status: ", request$status - )) - } - rtxt <- "" - rt_cache[[domain]] <- request -} - -# not ok - diverse -if( !(request$status == 404 | request$status < 400) ){ - stop(paste0( - "get_robotstxt(): ", domain, "; HTTP status: ", request$status - )) -} diff --git a/misc/spiderbar_issue_2_minimal_example.R b/misc/spiderbar_issue_2_minimal_example.R deleted file mode 100644 index d2c3f13..0000000 --- a/misc/spiderbar_issue_2_minimal_example.R +++ /dev/null @@ -1,38 +0,0 @@ -rtxt <- "# robots.txt zu http://www.example.org/\n\nUser-agent: UniversalRobot/1.0\nUser-agent: mein-Robot\nDisallow: /quellen/dtd/\n\nUser-agent: *\nDisallow: /unsinn/\nDisallow: /temp/\nDisallow: /newsticker.shtml" - - -paths_allowed( - paths = "/temp/some_file.txt", - robotstxt_list = list(rtxt), - check_method = "spiderbar", - bot = "*" -) -## FALSE - - -paths_allowed( - paths = "/temp/some_file.txt", - robotstxt_list = list(rtxt), - check_method = "spiderbar", - bot = "mein-Robot" -) -### TRUE - - -paths_allowed( - paths = "/temp/some_file.txt", - robotstxt_list = list(rtxt), - check_method = "robotstxt", - bot = "*" -) -## FALSE - - -paths_allowed( - paths = "/temp/some_file.txt", - robotstxt_list = list(rtxt), - check_method = "robotstxt", - bot = "mein-Robot" -) -## TRUE -