Skip to content

Commit

Permalink
Replace charset lib (#580)
Browse files Browse the repository at this point in the history
* Use std library to convert between different charsets

* Use std library mime parser instead of custom regex to extract charset
  • Loading branch information
ErikPelli authored Dec 20, 2024
1 parent 8fd2e76 commit 62d1bbd
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 47 deletions.
14 changes: 7 additions & 7 deletions ctx.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ package goproxy

import (
"crypto/tls"
"mime"
"net/http"
"regexp"
)

// ProxyCtx is the Proxy context, contains useful information about every request. It is passed to
Expand Down Expand Up @@ -79,15 +79,15 @@ func (ctx *ProxyCtx) Warnf(msg string, argv ...interface{}) {
ctx.printf("WARN: "+msg, argv...)
}

var charsetFinder = regexp.MustCompile("charset=([^ ;]*)")

// Will try to infer the character set of the request from the headers.
// Returns the empty string if we don't know which character set it used.
// Currently it will look for charset=<charset> in the Content-Type header of the request.
func (ctx *ProxyCtx) Charset() string {
charsets := charsetFinder.FindStringSubmatch(ctx.Resp.Header.Get("Content-Type"))
if charsets == nil {
return ""
contentType := ctx.Resp.Header.Get("Content-Type")
if _, params, err := mime.ParseMediaType(contentType); err == nil {
if cs, ok := params["charset"]; ok {
return cs
}
}
return charsets[1]
return ""
}
8 changes: 2 additions & 6 deletions ext/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,6 @@ go 1.20

require (
github.com/elazarl/goproxy v0.0.0-20241217120900-7711dfa3811c
github.com/rogpeppe/go-charset v0.0.0-20190617161244-0dc95cdf6f31
)

require (
golang.org/x/net v0.33.0 // indirect
golang.org/x/text v0.21.0 // indirect
golang.org/x/net v0.33.0
golang.org/x/text v0.21.0
)
2 changes: 0 additions & 2 deletions ext/go.sum
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
github.com/elazarl/goproxy v0.0.0-20241217120900-7711dfa3811c h1:yWAGp1CjD1mQGLUsADqPn5s1n2AkGAX33XLDUgoXzyo=
github.com/elazarl/goproxy v0.0.0-20241217120900-7711dfa3811c/go.mod h1:P73liMk9TZCyF9fXG/RyMeSizmATvpvy3ZS61/1eXn4=
github.com/rogpeppe/go-charset v0.0.0-20190617161244-0dc95cdf6f31 h1:DE4LcMKyqAVa6a0CGmVxANbnVb7stzMmPkQiieyNmfQ=
github.com/rogpeppe/go-charset v0.0.0-20190617161244-0dc95cdf6f31/go.mod h1:qgYeAmZ5ZIpBWTGllZSQnw97Dj+woV0toclVaRGI8pc=
golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
Expand Down
42 changes: 11 additions & 31 deletions ext/html/html.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,11 @@ import (
"strings"

"github.com/elazarl/goproxy"
"github.com/rogpeppe/go-charset/charset"
_ "github.com/rogpeppe/go-charset/data"
"golang.org/x/net/html/charset"
"golang.org/x/text/transform"
)

var IsHtml goproxy.RespCondition = goproxy.ContentTypeIs("text/html")

var IsCss goproxy.RespCondition = goproxy.ContentTypeIs("text/css")

var IsJavaScript goproxy.RespCondition = goproxy.ContentTypeIs("text/javascript",
"application/javascript")

var IsJson goproxy.RespCondition = goproxy.ContentTypeIs("text/json")

var IsXml goproxy.RespCondition = goproxy.ContentTypeIs("text/xml")

var IsWebRelatedText goproxy.RespCondition = goproxy.ContentTypeIs("text/html",
"text/css",
"text/javascript", "application/javascript",
"text/xml",
"text/json")
var IsHtml = goproxy.ContentTypeIs("text/html")

// HandleString will receive a function that filters a string, and will convert the
// request body to a utf8 string, according to the charset specified in the Content-Type
Expand Down Expand Up @@ -58,21 +43,16 @@ func HandleStringReader(f func(r io.Reader, ctx *goproxy.ProxyCtx) io.Reader) go
}

if strings.ToLower(charsetName) != "utf-8" {
r, err := charset.NewReader(charsetName, resp.Body)
if err != nil {
ctx.Warnf("Cannot convert from %v to utf-8: %v", charsetName, err)
tr, _ := charset.Lookup(charsetName)
if tr == nil {
ctx.Warnf("Cannot convert from %s to utf-8: not found", charsetName)
return resp
}
tr, err := charset.TranslatorTo(charsetName)
if err != nil {
ctx.Warnf("Can't translate to %v from utf-8: %v", charsetName, err)
return resp
}
if err != nil {
ctx.Warnf("Cannot translate to %v: %v", charsetName, err)
return resp
}
newr := charset.NewTranslatingReader(f(r, ctx), tr)

// Pass UTF-8 data to the callback f() function and convert its
// result back to the original encoding
r := transform.NewReader(resp.Body, tr.NewDecoder())
newr := transform.NewReader(f(r, ctx), tr.NewEncoder())
resp.Body = &readFirstCloseBoth{io.NopCloser(newr), resp.Body}
} else {
//no translation is needed, already at utf-8
Expand Down
1 change: 0 additions & 1 deletion ext/html/html_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ type ConstantServer int

func (s ConstantServer) ServeHTTP(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain; charset=iso-8859-8")
//w.Header().Set("Content-Type","text/plain; charset=cp-1255")
w.Write([]byte{0xe3, 0xf3})
}

Expand Down

0 comments on commit 62d1bbd

Please sign in to comment.