Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace charset lib #580

Merged
merged 2 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions ctx.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ package goproxy

import (
"crypto/tls"
"mime"
"net/http"
"regexp"
)

// ProxyCtx is the Proxy context, contains useful information about every request. It is passed to
Expand Down Expand Up @@ -79,15 +79,15 @@ func (ctx *ProxyCtx) Warnf(msg string, argv ...interface{}) {
ctx.printf("WARN: "+msg, argv...)
}

var charsetFinder = regexp.MustCompile("charset=([^ ;]*)")

// Will try to infer the character set of the request from the headers.
// Returns the empty string if we don't know which character set it used.
// Currently it will look for charset=<charset> in the Content-Type header of the request.
func (ctx *ProxyCtx) Charset() string {
charsets := charsetFinder.FindStringSubmatch(ctx.Resp.Header.Get("Content-Type"))
if charsets == nil {
return ""
contentType := ctx.Resp.Header.Get("Content-Type")
if _, params, err := mime.ParseMediaType(contentType); err == nil {
if cs, ok := params["charset"]; ok {
return cs
}
}
return charsets[1]
return ""
}
8 changes: 2 additions & 6 deletions ext/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,6 @@ go 1.20

require (
github.com/elazarl/goproxy v0.0.0-20241217120900-7711dfa3811c
github.com/rogpeppe/go-charset v0.0.0-20190617161244-0dc95cdf6f31
)

require (
golang.org/x/net v0.33.0 // indirect
golang.org/x/text v0.21.0 // indirect
golang.org/x/net v0.33.0
golang.org/x/text v0.21.0
)
2 changes: 0 additions & 2 deletions ext/go.sum
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
github.com/elazarl/goproxy v0.0.0-20241217120900-7711dfa3811c h1:yWAGp1CjD1mQGLUsADqPn5s1n2AkGAX33XLDUgoXzyo=
github.com/elazarl/goproxy v0.0.0-20241217120900-7711dfa3811c/go.mod h1:P73liMk9TZCyF9fXG/RyMeSizmATvpvy3ZS61/1eXn4=
github.com/rogpeppe/go-charset v0.0.0-20190617161244-0dc95cdf6f31 h1:DE4LcMKyqAVa6a0CGmVxANbnVb7stzMmPkQiieyNmfQ=
github.com/rogpeppe/go-charset v0.0.0-20190617161244-0dc95cdf6f31/go.mod h1:qgYeAmZ5ZIpBWTGllZSQnw97Dj+woV0toclVaRGI8pc=
golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
Expand Down
42 changes: 11 additions & 31 deletions ext/html/html.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,11 @@ import (
"strings"

"github.com/elazarl/goproxy"
"github.com/rogpeppe/go-charset/charset"
_ "github.com/rogpeppe/go-charset/data"
"golang.org/x/net/html/charset"
"golang.org/x/text/transform"
)

var IsHtml goproxy.RespCondition = goproxy.ContentTypeIs("text/html")

var IsCss goproxy.RespCondition = goproxy.ContentTypeIs("text/css")

var IsJavaScript goproxy.RespCondition = goproxy.ContentTypeIs("text/javascript",
"application/javascript")

var IsJson goproxy.RespCondition = goproxy.ContentTypeIs("text/json")

var IsXml goproxy.RespCondition = goproxy.ContentTypeIs("text/xml")

var IsWebRelatedText goproxy.RespCondition = goproxy.ContentTypeIs("text/html",
"text/css",
"text/javascript", "application/javascript",
"text/xml",
"text/json")
var IsHtml = goproxy.ContentTypeIs("text/html")

// HandleString will receive a function that filters a string, and will convert the
// request body to a utf8 string, according to the charset specified in the Content-Type
Expand Down Expand Up @@ -58,21 +43,16 @@ func HandleStringReader(f func(r io.Reader, ctx *goproxy.ProxyCtx) io.Reader) go
}

if strings.ToLower(charsetName) != "utf-8" {
r, err := charset.NewReader(charsetName, resp.Body)
if err != nil {
ctx.Warnf("Cannot convert from %v to utf-8: %v", charsetName, err)
tr, _ := charset.Lookup(charsetName)
if tr == nil {
ctx.Warnf("Cannot convert from %s to utf-8: not found", charsetName)
return resp
}
tr, err := charset.TranslatorTo(charsetName)
if err != nil {
ctx.Warnf("Can't translate to %v from utf-8: %v", charsetName, err)
return resp
}
if err != nil {
ctx.Warnf("Cannot translate to %v: %v", charsetName, err)
return resp
}
newr := charset.NewTranslatingReader(f(r, ctx), tr)

// Pass UTF-8 data to the callback f() function and convert its
// result back to the original encoding
r := transform.NewReader(resp.Body, tr.NewDecoder())
newr := transform.NewReader(f(r, ctx), tr.NewEncoder())
resp.Body = &readFirstCloseBoth{io.NopCloser(newr), resp.Body}
} else {
//no translation is needed, already at utf-8
Expand Down
1 change: 0 additions & 1 deletion ext/html/html_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ type ConstantServer int

func (s ConstantServer) ServeHTTP(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain; charset=iso-8859-8")
//w.Header().Set("Content-Type","text/plain; charset=cp-1255")
w.Write([]byte{0xe3, 0xf3})
}

Expand Down
Loading