Skip to content

Commit

Permalink
Add headers field in ruleset. Enable Google Cache.
Browse files Browse the repository at this point in the history
  • Loading branch information
mms-gianni committed Nov 9, 2023
1 parent d3c995d commit 6c1f58e
Show file tree
Hide file tree
Showing 6 changed files with 133 additions and 58 deletions.
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@ See in [ruleset.yaml](ruleset.yaml) for an example.
domains: # Additional domains to apply the rule
- www.example.com
- www.beispiel.de
headers:
x-forwarded-for: none # override X-Forwarded-For header or delete with none
referer: none # override Referer header or delete with none
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36
cookie: privacy=1
regexRules:
- match: <script\s+([^>]*\s+)?src="(/)([^"]*)"
replace: <script $1 script="/https://www.example.com/$3"
Expand All @@ -138,7 +143,7 @@ See in [ruleset.yaml](ruleset.yaml) for an example.
- domain: www.anotherdomain.com # Domain where the rule applies
paths: # Paths where the rule applies
- /article
googleCache: false # Search also in Google Cache
googleCache: false # Use Google Cache to fetch the content
regexRules: # Regex rules to apply
- match: <script\s+([^>]*\s+)?src="(/)([^"]*)"
replace: <script $1 script="/https://www.example.com/$3"
Expand Down
1 change: 1 addition & 0 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ func main() {
app := fiber.New(
fiber.Config{
Prefork: *prefork,
GETOnly: true,
},
)

Expand Down
133 changes: 78 additions & 55 deletions handlers/proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,45 @@ func fetchSite(urlpath string, queries map[string]string) (string, *http.Request
log.Println(u.String() + urlQuery)
}

rule := fetchRule(u.Host, u.Path)

if rule.GoogleCache {
u, err = url.Parse("https://webcache.googleusercontent.com/search?q=cache:" + u.String())
if err != nil {
return "", nil, nil, err
}
}

// Fetch the site
client := &http.Client{}
req, _ := http.NewRequest("GET", u.String()+urlQuery, nil)
req.Header.Set("User-Agent", UserAgent)
req.Header.Set("X-Forwarded-For", ForwardedFor)
req.Header.Set("Referer", u.String())
req.Header.Set("Host", u.Host)

if rule.Headers.UserAgent != "" {
req.Header.Set("User-Agent", rule.Headers.UserAgent)
} else {
req.Header.Set("User-Agent", UserAgent)
}

if rule.Headers.XForwardedFor != "" {
if rule.Headers.XForwardedFor != "none" {
req.Header.Set("X-Forwarded-For", rule.Headers.XForwardedFor)
}
} else {
req.Header.Set("X-Forwarded-For", ForwardedFor)
}

if rule.Headers.Referer != "" {
if rule.Headers.Referer != "none" {
req.Header.Set("Referer", rule.Headers.Referer)
}
} else {
req.Header.Set("Referer", u.String())
}

if rule.Headers.Cookie != "" {
req.Header.Set("Cookie", rule.Headers.Cookie)
}

resp, err := client.Do(req)

if err != nil {
Expand All @@ -79,11 +111,12 @@ func fetchSite(urlpath string, queries map[string]string) (string, *http.Request
return "", nil, nil, err
}

body := rewriteHtml(bodyB, u)
log.Print("rule", rule)
body := rewriteHtml(bodyB, u, rule)
return body, req, resp, nil
}

func rewriteHtml(bodyB []byte, u *url.URL) string {
func rewriteHtml(bodyB []byte, u *url.URL, rule Rule) string {
// Rewrite the HTML
body := string(bodyB)

Expand All @@ -104,7 +137,7 @@ func rewriteHtml(bodyB []byte, u *url.URL) string {
body = strings.ReplaceAll(body, "href=\"https://"+u.Host, "href=\"/https://"+u.Host+"/")

if os.Getenv("RULESET") != "" {
body = applyRules(u.Host, u.Path, body)
body = applyRules(body, rule)
}
return body
}
Expand Down Expand Up @@ -169,67 +202,57 @@ func loadRules() RuleSet {
return ruleSet
}

func applyRules(domain string, path string, body string) string {
func fetchRule(domain string, path string) Rule {
if len(rulesSet) == 0 {
return body
return Rule{}
}

rule := Rule{}
for _, rule := range rulesSet {
domains := rule.Domains
domains = append(domains, rule.Domain)
for _, ruleDomain := range domains {
if ruleDomain != domain {
continue
}
if len(rule.Paths) > 0 && !StringInSlice(path, rule.Paths) {
continue
}
for _, regexRule := range rule.RegexRules {
re := regexp.MustCompile(regexRule.Match)
body = re.ReplaceAllString(body, regexRule.Replace)
}
for _, injection := range rule.Injections {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
if err != nil {
log.Fatal(err)
}
if injection.Replace != "" {
doc.Find(injection.Position).ReplaceWithHtml(injection.Replace)
}
if injection.Append != "" {
doc.Find(injection.Position).AppendHtml(injection.Append)
}
if injection.Prepend != "" {
doc.Find(injection.Position).PrependHtml(injection.Prepend)
}
body, err = doc.Html()
if err != nil {
log.Fatal(err)
if ruleDomain == domain {
if len(rule.Paths) > 0 && !StringInSlice(path, rule.Paths) {
continue
}
// return first match
return rule
}
}
}

return body
return rule
}

type Rule struct {
Match string `yaml:"match"`
Replace string `yaml:"replace"`
}
func applyRules(body string, rule Rule) string {
if len(rulesSet) == 0 {
return body
}

type RuleSet []struct {
Domain string `yaml:"domain"`
Domains []string `yaml:"domains,omitempty"`
Paths []string `yaml:"paths,omitempty"`
GoogleCache bool `yaml:"googleCache,omitempty"`
RegexRules []Rule `yaml:"regexRules"`
Injections []struct {
Position string `yaml:"position"`
Append string `yaml:"append"`
Prepend string `yaml:"prepend"`
Replace string `yaml:"replace"`
} `yaml:"injections"`
for _, regexRule := range rule.RegexRules {
re := regexp.MustCompile(regexRule.Match)
body = re.ReplaceAllString(body, regexRule.Replace)
}
for _, injection := range rule.Injections {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
if err != nil {
log.Fatal(err)
}
if injection.Replace != "" {
doc.Find(injection.Position).ReplaceWithHtml(injection.Replace)
}
if injection.Append != "" {
doc.Find(injection.Position).AppendHtml(injection.Append)
}
if injection.Prepend != "" {
doc.Find(injection.Position).PrependHtml(injection.Prepend)
}
body, err = doc.Html()
if err != nil {
log.Fatal(err)
}
}

return body
}

func StringInSlice(s string, list []string) bool {
Expand Down
2 changes: 1 addition & 1 deletion handlers/proxy.test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func TestRewriteHtml(t *testing.T) {
</html>
`

actual := rewriteHtml(bodyB, u)
actual := rewriteHtml(bodyB, u, Rule{})
assert.Equal(t, expected, actual)
}

Expand Down
28 changes: 28 additions & 0 deletions handlers/types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package handlers

type Regex struct {
Match string `yaml:"match"`
Replace string `yaml:"replace"`
}

type RuleSet []Rule

type Rule struct {
Domain string `yaml:"domain"`
Domains []string `yaml:"domains,omitempty"`
Paths []string `yaml:"paths,omitempty"`
Headers struct {
UserAgent string `yaml:"user-agent,omitempty"`
XForwardedFor string `yaml:"x-forwarded-for,omitempty"`
Referer string `yaml:"referer,omitempty"`
Cookie string `yaml:"cookie,omitempty"`
} `yaml:"headers,omitempty"`
GoogleCache bool `yaml:"googleCache,omitempty"`
RegexRules []Regex `yaml:"regexRules"`
Injections []struct {
Position string `yaml:"position"`
Append string `yaml:"append"`
Prepend string `yaml:"prepend"`
Replace string `yaml:"replace"`
} `yaml:"injections"`
}
20 changes: 19 additions & 1 deletion ruleset.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
- domain: www.example.com
domains:
- www.beispiel.com
- www.beispiel.de
googleCache: true
headers:
x-forwarded-for: none
referer: none
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36
cookie: privacy=1
regexRules:
- match: <script\s+([^>]*\s+)?src="(/)([^"]*)"
replace: <script $1 script="/https://www.example.com/$3"
Expand Down Expand Up @@ -77,6 +83,10 @@
- domains:
- www.nytimes.com
- www.time.com
headers:
ueser-agent: Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
cookie: nyt-a=; nyt-gdpr=0; nyt-geo=DE; nyt-privacy=1
referer: https://www.google.com/
injections:
- position: head
append: |
Expand Down Expand Up @@ -146,3 +156,11 @@
headimage.forEach(image => { image.style.filter = ''; });
});
</script>
- domain: www.medium.com
domains:
- medium.com
headers:
referer: https://t.co/x?amp=1
x-forwarded-for: none
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36
cookie:

0 comments on commit 6c1f58e

Please sign in to comment.