diff --git a/ctx.go b/ctx.go index b372f7d4..acd9d11e 100644 --- a/ctx.go +++ b/ctx.go @@ -2,8 +2,8 @@ package goproxy import ( "crypto/tls" + "mime" "net/http" - "regexp" ) // ProxyCtx is the Proxy context, contains useful information about every request. It is passed to @@ -79,15 +79,15 @@ func (ctx *ProxyCtx) Warnf(msg string, argv ...interface{}) { ctx.printf("WARN: "+msg, argv...) } -var charsetFinder = regexp.MustCompile("charset=([^ ;]*)") - // Will try to infer the character set of the request from the headers. // Returns the empty string if we don't know which character set it used. // Currently it will look for charset= in the Content-Type header of the request. func (ctx *ProxyCtx) Charset() string { - charsets := charsetFinder.FindStringSubmatch(ctx.Resp.Header.Get("Content-Type")) - if charsets == nil { - return "" + contentType := ctx.Resp.Header.Get("Content-Type") + if _, params, err := mime.ParseMediaType(contentType); err == nil { + if cs, ok := params["charset"]; ok { + return cs + } } - return charsets[1] + return "" } diff --git a/ext/go.mod b/ext/go.mod index fb5fb998..9c7d3945 100644 --- a/ext/go.mod +++ b/ext/go.mod @@ -4,10 +4,6 @@ go 1.20 require ( github.com/elazarl/goproxy v0.0.0-20241217120900-7711dfa3811c - github.com/rogpeppe/go-charset v0.0.0-20190617161244-0dc95cdf6f31 -) - -require ( - golang.org/x/net v0.33.0 // indirect - golang.org/x/text v0.21.0 // indirect + golang.org/x/net v0.33.0 + golang.org/x/text v0.21.0 ) diff --git a/ext/go.sum b/ext/go.sum index 88684d1d..b9cadd46 100644 --- a/ext/go.sum +++ b/ext/go.sum @@ -1,7 +1,5 @@ github.com/elazarl/goproxy v0.0.0-20241217120900-7711dfa3811c h1:yWAGp1CjD1mQGLUsADqPn5s1n2AkGAX33XLDUgoXzyo= github.com/elazarl/goproxy v0.0.0-20241217120900-7711dfa3811c/go.mod h1:P73liMk9TZCyF9fXG/RyMeSizmATvpvy3ZS61/1eXn4= -github.com/rogpeppe/go-charset v0.0.0-20190617161244-0dc95cdf6f31 h1:DE4LcMKyqAVa6a0CGmVxANbnVb7stzMmPkQiieyNmfQ= -github.com/rogpeppe/go-charset v0.0.0-20190617161244-0dc95cdf6f31/go.mod h1:qgYeAmZ5ZIpBWTGllZSQnw97Dj+woV0toclVaRGI8pc= golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= diff --git a/ext/html/html.go b/ext/html/html.go index ca3128ae..cebae05a 100644 --- a/ext/html/html.go +++ b/ext/html/html.go @@ -9,26 +9,11 @@ import ( "strings" "github.com/elazarl/goproxy" - "github.com/rogpeppe/go-charset/charset" - _ "github.com/rogpeppe/go-charset/data" + "golang.org/x/net/html/charset" + "golang.org/x/text/transform" ) -var IsHtml goproxy.RespCondition = goproxy.ContentTypeIs("text/html") - -var IsCss goproxy.RespCondition = goproxy.ContentTypeIs("text/css") - -var IsJavaScript goproxy.RespCondition = goproxy.ContentTypeIs("text/javascript", - "application/javascript") - -var IsJson goproxy.RespCondition = goproxy.ContentTypeIs("text/json") - -var IsXml goproxy.RespCondition = goproxy.ContentTypeIs("text/xml") - -var IsWebRelatedText goproxy.RespCondition = goproxy.ContentTypeIs("text/html", - "text/css", - "text/javascript", "application/javascript", - "text/xml", - "text/json") +var IsHtml = goproxy.ContentTypeIs("text/html") // HandleString will receive a function that filters a string, and will convert the // request body to a utf8 string, according to the charset specified in the Content-Type @@ -58,21 +43,16 @@ func HandleStringReader(f func(r io.Reader, ctx *goproxy.ProxyCtx) io.Reader) go } if strings.ToLower(charsetName) != "utf-8" { - r, err := charset.NewReader(charsetName, resp.Body) - if err != nil { - ctx.Warnf("Cannot convert from %v to utf-8: %v", charsetName, err) + tr, _ := charset.Lookup(charsetName) + if tr == nil { + ctx.Warnf("Cannot convert from %s to utf-8: not found", charsetName) return resp } - tr, err := charset.TranslatorTo(charsetName) - if err != nil { - ctx.Warnf("Can't translate to %v from utf-8: %v", charsetName, err) - return resp - } - if err != nil { - ctx.Warnf("Cannot translate to %v: %v", charsetName, err) - return resp - } - newr := charset.NewTranslatingReader(f(r, ctx), tr) + + // Pass UTF-8 data to the callback f() function and convert its + // result back to the original encoding + r := transform.NewReader(resp.Body, tr.NewDecoder()) + newr := transform.NewReader(f(r, ctx), tr.NewEncoder()) resp.Body = &readFirstCloseBoth{io.NopCloser(newr), resp.Body} } else { //no translation is needed, already at utf-8 diff --git a/ext/html/html_test.go b/ext/html/html_test.go index 0949c1ca..cb990ce4 100644 --- a/ext/html/html_test.go +++ b/ext/html/html_test.go @@ -14,7 +14,6 @@ type ConstantServer int func (s ConstantServer) ServeHTTP(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/plain; charset=iso-8859-8") - //w.Header().Set("Content-Type","text/plain; charset=cp-1255") w.Write([]byte{0xe3, 0xf3}) }