From 0939833b0cca2a50054bf1db437db62295c1e299 Mon Sep 17 00:00:00 2001 From: Sami Jaktholm Date: Mon, 12 Jun 2017 18:08:40 +0300 Subject: [PATCH 1/5] add http proxy support to request_uri() These changes introduce support for making HTTP requests through a forward proxy when using the high-level request_uri() API. The new set_proxy_options() function can be used to configure HTTP proxy settings. This method takes a table as a parameter that can include the following fields: * http_proxy - an URI to a proxy that should be used for http:// requests * https_proxy - an URI to a proxy that should be used for https:// requests * no_proxy - a comma separated list of hosts / IPs that should not go through the configured proxy When request_uri() is called with an http:// URI and http_proxy has been configured for the client instance, the connection will be established to the proxy host. Once the connection has been established, the HTTP request is sent to the proxy host as if the peer on the other end of the connection was the remote server. The only difference to the non-proxy case is that the path sent to the proxy is actually a full URI instead of a relative path. When request_uri() is called with an https:// URI and https_proxy has been configured for the client instance, the connection will be established to the proxy host. Once the connection has been established, we go ahead and perform a CONNECT request to open a TCP tunnel to the remote server. If the proxy gives a success response, to the CONNECT request, the client continues to perform the TLS handshake with the remote server and making the request as if there was no proxy configured. Some tests have been included which verify that the proxy options are interpreted correctly in different cases and that http_proxy works as real proxies expect. This is about as much there is to test without actually including a real forward proxy in the test harness. Manual testing has been performed against squid and tinyproxy. Fixes #63 --- README.md | 12 +++ lib/resty/http.lua | 103 ++++++++++++++++++- t/14-host-header.t | 27 +++++ t/16-http-proxy.t | 247 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 388 insertions(+), 1 deletion(-) create mode 100644 t/16-http-proxy.t diff --git a/README.md b/README.md index aee6028..5f21ff2 100644 --- a/README.md +++ b/README.md @@ -192,6 +192,18 @@ Note that calling this instead of `close` is "safe" in that it will conditionall In case of success, returns `1`. In case of errors, returns `nil, err`. In the case where the conneciton is conditionally closed as described above, returns `2` and the error string `connection must be closed`. +## set_proxy_options + +`syntax: httpc:set_proxy_options(opts)` + +Configure an http proxy to be used with this client instance. The `opts` is a table that accepts the following fields: + +* `http_proxy` - an URI to a proxy server to be used with http requests +* `https_proxy` - an URI to a proxy server to be used with https requests +* `no_proxy` - a comma separated list of hosts that should not be proxied. + +Note that proxy options are only applied when using the high-level `request_uri()` API. + ## get_reused_times `syntax: times, err = httpc:get_reused_times()` diff --git a/lib/resty/http.lua b/lib/resty/http.lua index 4949025..b8bc031 100644 --- a/lib/resty/http.lua +++ b/lib/resty/http.lua @@ -800,11 +800,65 @@ function _M.request_uri(self, uri, params) if not params.path then params.path = path end if not params.query then params.query = query end - local c, err = self:connect(host, port) + -- See if we should use a proxy to make this request + local proxy_host, proxy_port; + local proxy_uri = self:get_proxy_uri(scheme, host) + if proxy_uri then + local parsed_proxy_uri, err = self:parse_uri(proxy_uri, false) + if not parsed_proxy_uri then + return nil, err + end + + proxy_host, proxy_port = parsed_proxy_uri[2], parsed_proxy_uri[3] + end + + local c, err = self:connect(proxy_host or host, proxy_port or port) if not c then return nil, err end + if proxy_uri and scheme == "https" then + -- Make a CONNECT request to create a tunnel to the destination through + -- the proxy + local destination = host .. ":" .. port + local res, err = self:request({ + method = "CONNECT", + path = destination, + headers = { + ["Host"] = destination + } + }) + + if not res then + return nil, err + end + + if res.status < 200 or res.status > 299 then + return nil, "failed to establish a tunnel through a proxy: " .. res.status + end + + -- don't keep this connection alive as the next request could target + -- any host and re-using the tunnel for that is not possible + self.keepalive = false + end + + if proxy_uri and scheme == "http" then + -- http proxies expect to see the full URI in the request line + if port == 80 then + params.path = scheme .. "://" .. host .. path + else + params.path = scheme .. "://" .. host .. ":" .. port .. path + end + end + + if proxy_uri then + -- self:connect() set the host and port to point to the proxy server. As + -- the connection to the proxy has been established, set the host and port + -- to point to the actual remote endpoint at the other end of the tunnel + self.host = host + self.port = port + end + if scheme == "https" then local verify = true if params.ssl_verify == false then @@ -914,5 +968,52 @@ function _M.proxy_response(self, response, chunksize) until not chunk end +function _M.set_proxy_options(self, opts) + self.proxy_opts = opts +end + +function _M.get_proxy_uri(self, scheme, host) + if not self.proxy_opts then + return nil + end + + -- Check if the no_proxy option matches this host. Implementation adapted + -- from lua-http library (https://github.com/daurnimator/lua-http) + if self.proxy_opts.no_proxy then + if self.proxy_opts.no_proxy == "*" then + -- all hosts are excluded + return nil + end + + local no_proxy_set = {} + -- wget allows domains in no_proxy list to be prefixed by "." + -- e.g. no_proxy=.mit.edu + for host_suffix in self.proxy_opts.no_proxy:gmatch("%.?([^,]+)") do + no_proxy_set[host_suffix] = true + end + + -- From curl docs: + -- matched as either a domain which contains the hostname, or the + -- hostname itself. For example local.com would match local.com, + -- local.com:80, and www.local.com, but not www.notlocal.com. + for pos in host:gmatch("%f[^%z%.]()") do + local host_suffix = host:sub(pos, -1) + if no_proxy_set[host_suffix] then + return nil + end + end + end + + if scheme == "http" and self.proxy_opts.http_proxy then + return self.proxy_opts.http_proxy + end + + if scheme == "https" and self.proxy_opts.https_proxy then + return self.proxy_opts.https_proxy + end + + return nil +end + return _M diff --git a/t/14-host-header.t b/t/14-host-header.t index 62a6164..6ea28eb 100644 --- a/t/14-host-header.t +++ b/t/14-host-header.t @@ -165,3 +165,30 @@ GET /a [error] --- response_body Unable to generate a useful Host header for a unix domain socket. Please provide one. + +=== TEST 6: Host header is correct when http_proxy is used +--- http_config + lua_package_path "$TEST_NGINX_PWD/lib/?.lua;;"; + error_log logs/error.log debug; + resolver 8.8.8.8; + server { + listen *:8080; + } + +--- config + location /lua { + content_by_lua ' + local http = require "resty.http" + local httpc = http.new() + httpc:set_proxy_options({ + http_proxy = "http://127.0.0.1:8080" + }) + local res, err = httpc:request_uri("http://127.0.0.1:8081") + '; + } +--- request +GET /lua +--- no_error_log +[error] +--- error_log +Host: 127.0.0.1:8081 diff --git a/t/16-http-proxy.t b/t/16-http-proxy.t new file mode 100644 index 0000000..2f77048 --- /dev/null +++ b/t/16-http-proxy.t @@ -0,0 +1,247 @@ +use Test::Nginx::Socket; +use Cwd qw(cwd); + +plan tests => repeat_each() * (blocks() * 4); + +my $pwd = cwd(); + +$ENV{TEST_NGINX_RESOLVER} = '8.8.8.8'; +$ENV{TEST_NGINX_PWD} ||= $pwd; +$ENV{TEST_COVERAGE} ||= 0; + +our $HttpConfig = qq{ + lua_package_path "$pwd/lib/?.lua;/usr/local/share/lua/5.1/?.lua;;"; + error_log logs/error.log debug; + resolver 8.8.8.8; + + init_by_lua_block { + if $ENV{TEST_COVERAGE} == 1 then + jit.off() + require("luacov.runner").init() + end + } +}; + +no_long_string(); +run_tests(); + +__DATA__ +=== TEST 1: get_proxy_uri returns nil if proxy is not configured +--- http_config eval: $::HttpConfig +--- config + location /lua { + content_by_lua_block { + local http = require "resty.http" + local httpc = http.new() + ngx.say(httpc:get_proxy_uri("http", "example.com")) + } + } +--- request +GET /lua +--- response_body +nil +--- no_error_log +[error] +[warn] + +=== TEST 2: get_proxy_uri matches no_proxy hosts correctly +--- http_config eval: $::HttpConfig +--- config + location /lua { + content_by_lua_block { + local http = require "resty.http" + local httpc = http.new() + + -- helper that verifies get_proxy_uri works correctly with the given + -- scheme, host and no_proxy list + function test_no_proxy(scheme, host, no_proxy) + httpc:set_proxy_options({ + http_proxy = "http://http_proxy.example.com", + https_proxy = "http://https_proxy.example.com", + no_proxy = no_proxy + }) + + local proxy_uri = httpc:get_proxy_uri(scheme, host) + ngx.say("scheme: ", scheme, ", host: ", host, ", no_proxy: ", no_proxy, ", proxy_uri: ", proxy_uri) + end + + -- All these match the no_proxy list + test_no_proxy("http", "example.com", nil) + test_no_proxy("http", "example.com", "*") + test_no_proxy("http", "example.com", "example.com") + test_no_proxy("http", "sub.example.com", "example.com") + test_no_proxy("http", "example.com", "example.com,example.org") + test_no_proxy("http", "example.com", "example.org,example.com") + + -- Same for https for good measure + test_no_proxy("https", "example.com", nil) + test_no_proxy("https", "example.com", "*") + test_no_proxy("https", "example.com", "example.com") + test_no_proxy("https", "sub.example.com", "example.com") + test_no_proxy("https", "example.com", "example.com,example.org") + test_no_proxy("https", "example.com", "example.org,example.com") + + -- Edge cases + + -- example.com should match .example.com in the no_proxy list (legacy behavior of wget) + test_no_proxy("http", "example.com", ".example.com") + + -- notexample.com should not match example.com in the no_proxy list (not a subdomain) + test_no_proxy("http", "notexample.com", "example.com") + } + } +--- request +GET /lua +--- response_body +scheme: http, host: example.com, no_proxy: nil, proxy_uri: http://http_proxy.example.com +scheme: http, host: example.com, no_proxy: *, proxy_uri: nil +scheme: http, host: example.com, no_proxy: example.com, proxy_uri: nil +scheme: http, host: sub.example.com, no_proxy: example.com, proxy_uri: nil +scheme: http, host: example.com, no_proxy: example.com,example.org, proxy_uri: nil +scheme: http, host: example.com, no_proxy: example.org,example.com, proxy_uri: nil +scheme: https, host: example.com, no_proxy: nil, proxy_uri: http://https_proxy.example.com +scheme: https, host: example.com, no_proxy: *, proxy_uri: nil +scheme: https, host: example.com, no_proxy: example.com, proxy_uri: nil +scheme: https, host: sub.example.com, no_proxy: example.com, proxy_uri: nil +scheme: https, host: example.com, no_proxy: example.com,example.org, proxy_uri: nil +scheme: https, host: example.com, no_proxy: example.org,example.com, proxy_uri: nil +scheme: http, host: example.com, no_proxy: .example.com, proxy_uri: nil +scheme: http, host: notexample.com, no_proxy: example.com, proxy_uri: http://http_proxy.example.com +--- no_error_log +[error] +[warn] + +=== TEST 3: get_proxy_uri returns correct proxy URIs for http and https URIs +--- http_config eval: $::HttpConfig +--- config + location /lua { + content_by_lua_block { + local http = require "resty.http" + local httpc = http.new() + + -- helper that configures the proxy opts as proived and checks what + -- get_proxy_uri says for the given scheme / host pair + function test_get_proxy_uri(scheme, host, http_proxy, https_proxy) + httpc:set_proxy_options({ + http_proxy = http_proxy, + https_proxy = https_proxy + }) + + local proxy_uri = httpc:get_proxy_uri(scheme, host) + ngx.say( + "scheme: ", scheme, + ", host: ", host, + ", http_proxy: ", http_proxy, + ", https_proxy: ", https_proxy, + ", proxy_uri: ", proxy_uri + ) + end + + -- http + test_get_proxy_uri("http", "example.com", "http_proxy", "https_proxy") + test_get_proxy_uri("http", "example.com", nil, "https_proxy") + + -- https + test_get_proxy_uri("https", "example.com", "http_proxy", "https_proxy") + test_get_proxy_uri("https", "example.com", "http_proxy", nil) + } + } +--- request +GET /lua +--- response_body +scheme: http, host: example.com, http_proxy: http_proxy, https_proxy: https_proxy, proxy_uri: http_proxy +scheme: http, host: example.com, http_proxy: nil, https_proxy: https_proxy, proxy_uri: nil +scheme: https, host: example.com, http_proxy: http_proxy, https_proxy: https_proxy, proxy_uri: https_proxy +scheme: https, host: example.com, http_proxy: http_proxy, https_proxy: nil, proxy_uri: nil +--- no_error_log +[error] +[warn] + +=== TEST 4: request_uri uses http_proxy correctly for non-standard destination ports +--- http_config + lua_package_path "$TEST_NGINX_PWD/lib/?.lua;;"; + error_log logs/error.log debug; + resolver 8.8.8.8; + server { + listen *:8080; + + location / { + content_by_lua_block { + ngx.print(ngx.req.raw_header()) + } + } + } +--- config + location /lua { + content_by_lua_block { + local http = require "resty.http" + local httpc = http.new() + httpc:set_proxy_options({ + http_proxy = "http://127.0.0.1:8080", + https_proxy = "http://127.0.0.1:8080" + }) + + -- request should go to the proxy server + local res, err = httpc:request_uri("http://127.0.0.1:1234/target?a=1&b=2") + + if not res then + ngx.log(ngx.ERR, err) + return + end + ngx.status = res.status + ngx.say(res.body) + } + } +--- request +GET /lua +--- response_body_like +^GET http://127.0.0.1:1234/target\?a=1&b=2 HTTP/.+\r\nHost: 127.0.0.1:1234.+ +--- no_error_log +[error] +[warn] + +=== TEST 5: request_uri uses http_proxy correctly for standard destination port +--- http_config + lua_package_path "$TEST_NGINX_PWD/lib/?.lua;;"; + error_log logs/error.log debug; + resolver 8.8.8.8; + server { + listen *:8080; + + location / { + content_by_lua_block { + ngx.print(ngx.req.raw_header()) + } + } + } +--- config + location /lua { + content_by_lua_block { + local http = require "resty.http" + local httpc = http.new() + httpc:set_proxy_options({ + http_proxy = "http://127.0.0.1:8080", + https_proxy = "http://127.0.0.1:8080" + }) + + -- request should go to the proxy server + local res, err = httpc:request_uri("http://127.0.0.1/target?a=1&b=2") + + if not res then + ngx.log(ngx.ERR, err) + return + end + + -- the proxy echoed the raw request header and we shall pass it onwards + -- to the test harness + ngx.status = res.status + ngx.say(res.body) + } + } +--- request +GET /lua +--- response_body_like +^GET http://127.0.0.1/target\?a=1&b=2 HTTP/.+\r\nHost: 127.0.0.1.+ +--- no_error_log +[error] +[warn] From 5c96e1f381160d2ec853ba00b3b71a321c1ca388 Mon Sep 17 00:00:00 2001 From: Sami Jaktholm Date: Tue, 18 Jul 2017 21:33:53 +0300 Subject: [PATCH 2/5] followup: remove unneeded semicolon --- lib/resty/http.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/resty/http.lua b/lib/resty/http.lua index b8bc031..ac4233f 100644 --- a/lib/resty/http.lua +++ b/lib/resty/http.lua @@ -801,7 +801,7 @@ function _M.request_uri(self, uri, params) if not params.query then params.query = query end -- See if we should use a proxy to make this request - local proxy_host, proxy_port; + local proxy_host, proxy_port local proxy_uri = self:get_proxy_uri(scheme, host) if proxy_uri then local parsed_proxy_uri, err = self:parse_uri(proxy_uri, false) From b0e6fc42e47965b04a220b868d71e17859cbfb9e Mon Sep 17 00:00:00 2001 From: Sami Jaktholm Date: Tue, 22 Aug 2017 19:00:49 +0300 Subject: [PATCH 3/5] followup: separate proxy connection setup logic to a low-level connect_proxy() method This commit separates the logic that sets up the connection to the proxy server to a separate connect_proxy() method. This method is provided to users as a low-level API they can use similarly to the connect() method. The connect_proxy() will handle the connection establishment to the proxy server and performs the CONNECT request to setup a TCP tunnel to a https protected host. Similar to the connect() method, it is then up to the user to take care of the details that are relevant when using a proxy (i.e use absolute uris for http requests and perform a TLS handshake for https connections). There's also a new test case that verifies the CONNECT request is used properly to establish a tunnel to the remote server when TLS is used. Due to the limitations of the test framework, this case only considers the format of the outgoing CONNECT request and how the code handles errors sent by the proxy. Testing a full TLS tunnel is unfortunately not possible with the tools the test framework provides as it would require a real reverse proxy or a method of forwarding the TCP connection after the CONNECT request is received to a real web server that can talk TLS. --- README.md | 23 ++++++++- lib/resty/http.lua | 120 +++++++++++++++++++++++++++++---------------- t/16-http-proxy.t | 52 ++++++++++++++++++++ 3 files changed, 151 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index 5f21ff2..14dfdec 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ Production ready. * [new](#new) * [connect](#connect) +* [connect_proxy](#connect_proxy) * [set_timeout](#set_timeout) * [set_timeouts](#set_timeouts) * [ssl_handshake](#ssl_handshake) @@ -158,6 +159,24 @@ An optional Lua table can be specified as the last argument to this method to sp * `pool` : Specifies a custom name for the connection pool being used. If omitted, then the connection pool name will be generated from the string template `:` or ``. +## connect_proxy + +`syntax: ok, err = httpc:connect_proxy(proxy_uri, scheme, host, port)` + +Attempts to connect to the web server through the given proxy server. The method accepts the following arguments: + +* `proxy_uri` - Full URI of the proxy server to use (e.g. `http://proxy.example.com:3128/`). Note: Only `http` protocol is supported. +* `scheme` - The protocol to use between the proxy server and the remote host (`http` or `https`). If `https` is specified as the scheme, `connect_proxy()` makes a `CONNECT` request to establish a TCP tunnel to the remote host through the proxy server. +* `host` - The hostname of the remote host to connect to. +* `port` - The port of the remote host to connect to. + +If an error occurs during the connection attempt, this method returns `nil` with a string describing the error. If the connection was successfully established, the method returns `1`. + +There's a few key points to keep in mind when using this api: + +* If the scheme is `https`, you need to perform the TLS handshake with the remote server manually using the `ssl_handshake()` method before sending any requests through the proxy tunnel. +* If the scheme is `http`, you need to ensure that the requests you send through the connections conforms to [RFC 7230](https://tools.ietf.org/html/rfc7230) and especially [Section 5.3.2.](https://tools.ietf.org/html/rfc7230#section-5.3.2) which states that the request target must be in absolute form. In practice, this means that when you use `send_request()`, the `path` must be an absolute URI to the resource (e.g. `http://example.com/index.html` instead of just `/index.html`). + ## set_timeout `syntax: httpc:set_timeout(time)` @@ -244,7 +263,7 @@ When the request is successful, `res` will contain the following fields: * `status` The status code. * `reason` The status reason phrase. * `headers` A table of headers. Multiple headers with the same field name will be presented as a table of values. -* `has_body` A boolean flag indicating if there is a body to be read. +* `has_body` A boolean flag indicating if there is a body to be read. * `body_reader` An iterator function for reading the body in a streaming fashion. * `read_body` A method to read the entire body into a string. * `read_trailers` A method to merge any trailers underneath the headers, after reading the body. @@ -420,7 +439,7 @@ local res, err = httpc:request{ } ``` -If `sock` is specified, +If `sock` is specified, # Author diff --git a/lib/resty/http.lua b/lib/resty/http.lua index ac4233f..afb00f9 100644 --- a/lib/resty/http.lua +++ b/lib/resty/http.lua @@ -787,7 +787,6 @@ function _M.request_pipeline(self, requests) return responses end - function _M.request_uri(self, uri, params) params = tbl_copy(params or {}) -- Take by value @@ -801,60 +800,50 @@ function _M.request_uri(self, uri, params) if not params.query then params.query = query end -- See if we should use a proxy to make this request - local proxy_host, proxy_port local proxy_uri = self:get_proxy_uri(scheme, host) - if proxy_uri then - local parsed_proxy_uri, err = self:parse_uri(proxy_uri, false) - if not parsed_proxy_uri then - return nil, err - end - proxy_host, proxy_port = parsed_proxy_uri[2], parsed_proxy_uri[3] + -- Make the connection either through the proxy or directly + -- to the remote host + local c, err + + if proxy_uri then + c, err = self:connect_proxy(proxy_uri, scheme, host, port) + else + c, err = self:connect(host, port) end - local c, err = self:connect(proxy_host or host, proxy_port or port) if not c then return nil, err end - if proxy_uri and scheme == "https" then - -- Make a CONNECT request to create a tunnel to the destination through - -- the proxy - local destination = host .. ":" .. port - local res, err = self:request({ - method = "CONNECT", - path = destination, - headers = { - ["Host"] = destination - } - }) - - if not res then - return nil, err - end - - if res.status < 200 or res.status > 299 then - return nil, "failed to establish a tunnel through a proxy: " .. res.status + if proxy_uri then + if scheme == "http" then + -- When a proxy is used, the target URI must be in absolute-form + -- (RFC 7230, Section 5.3.2.). That is, it must be an absolute URI + -- to the remote resource with the scheme, host and an optional port + -- in place. + -- + -- Since _format_request() constructs the request line by concatenating + -- params.path and params.query together, we need to modify the path + -- to also include the scheme, host and port so that the final form + -- in conformant to RFC 7230. + if port == 80 then + params.path = scheme .. "://" .. host .. path + else + params.path = scheme .. "://" .. host .. ":" .. port .. path + end end - -- don't keep this connection alive as the next request could target - -- any host and re-using the tunnel for that is not possible - self.keepalive = false - end - - if proxy_uri and scheme == "http" then - -- http proxies expect to see the full URI in the request line - if port == 80 then - params.path = scheme .. "://" .. host .. path - else - params.path = scheme .. "://" .. host .. ":" .. port .. path + if scheme == "https" then + -- don't keep this connection alive as the next request could target + -- any host and re-using the proxy tunnel for that is not possible + self.keepalive = false end - end - if proxy_uri then - -- self:connect() set the host and port to point to the proxy server. As + -- self:connect_uri() set the host and port to point to the proxy server. As -- the connection to the proxy has been established, set the host and port - -- to point to the actual remote endpoint at the other end of the tunnel + -- to point to the actual remote endpoint at the other end of the tunnel to + -- ensure the correct Host header added to the requests. self.host = host self.port = port end @@ -1016,4 +1005,51 @@ function _M.get_proxy_uri(self, scheme, host) end +function _M.connect_proxy(self, proxy_uri, scheme, host, port) + -- Parse the provided proxy URI + local parsed_proxy_uri, err = self:parse_uri(proxy_uri, false) + if not parsed_proxy_uri then + return nil, err + end + + -- Check that the scheme is http (https is not supported for + -- connections between the client and the proxy) + local proxy_scheme = parsed_proxy_uri[1] + if proxy_scheme ~= "http" then + return nil, "protocol " .. proxy_scheme .. " not supported for proxy connections" + end + + -- Make the connection to the given proxy + local proxy_host, proxy_port = parsed_proxy_uri[2], parsed_proxy_uri[3] + local c, err = self:connect(proxy_host, proxy_port) + if not c then + return nil, err + end + + if scheme == "https" then + -- Make a CONNECT request to create a tunnel to the destination through + -- the proxy. The request-target and the Host header must be in the + -- authority-form of RFC 7230 Section 5.3.3. See also RFC 7231 Section + -- 4.3.6 for more details about the CONNECT request + local destination = host .. ":" .. port + local res, err = self:request({ + method = "CONNECT", + path = destination, + headers = { + ["Host"] = destination + } + }) + + if not res then + return nil, err + end + + if res.status < 200 or res.status > 299 then + return nil, "failed to establish a tunnel through a proxy: " .. res.status + end + end + + return c, nil +end + return _M diff --git a/t/16-http-proxy.t b/t/16-http-proxy.t index 2f77048..bdedded 100644 --- a/t/16-http-proxy.t +++ b/t/16-http-proxy.t @@ -245,3 +245,55 @@ GET /lua --- no_error_log [error] [warn] + +=== TEST 6: request_uri makes a proper CONNECT request when proxying https resources +--- http_config eval: $::HttpConfig +--- config + location /lua { + content_by_lua_block { + local http = require "resty.http" + local httpc = http.new() + httpc:set_proxy_options({ + http_proxy = "http://127.0.0.1:12345", + https_proxy = "http://127.0.0.1:12345" + }) + + -- Slight Hack: temporarily change the module global user agent to make it + -- predictable for this test case + local ua = http._USER_AGENT + http._USER_AGENT = "test_ua" + local res, err = httpc:request_uri("https://127.0.0.1/target?a=1&b=2") + http._USER_AGENT = ua + + if not err then + -- The proxy request should fail as the TCP server listening returns + -- 403 response. We cannot really test the success case here as that + -- would require an actual reverse proxy to be implemented through + -- the limited functionality we have available in the raw TCP sockets + ngx.log(ngx.ERR, "unexpected success") + return + end + + ngx.status = 403 + ngx.say(err) + } + } +--- tcp_listen: 12345 +--- tcp_query eval_stdout +# Note: The incoming request contains CRLF line endings and print needs to +# be used here to get the same line breaks to the expected request +print "CONNECT 127.0.0.1:443 HTTP/1.1\r\nUser-Agent: test_ua\r\nHost: 127.0.0.1:443\r\n\r\n" + +# The reply cannot be successful or otherwise the client would start +# to do a TLS handshake with the proxied host and that we cannot +# do with these sockets +--- tcp_reply +HTTP/1.1 403 Forbidden +Connection: close + +--- request +GET /lua +--- error_code: 403 +--- no_error_log +[error] +[warn] From af8b08b7b96788c1be03e649b1fd01892d98c20d Mon Sep 17 00:00:00 2001 From: Sami Jaktholm Date: Sun, 22 Oct 2017 19:33:28 +0300 Subject: [PATCH 4/5] followup: minor fixes according to pr comments This includes: * Add set_proxy_options to README TOC. * Make a copy of proxy options gotten from the user. * Rewrite no_proxy matching to use ngx.re.gmatch and friends instead of Lua's built-in gmatch functions. Since the regular expression formats are completely different, the no_proxy matching process had to be modified a bit. * Fix 14-host-header.t on systems with ipv6 support by disabling ipv6 address resolution. * Make 16-http-proxy.t more reliable by ignoring the order of the headers in the CONNECT request (the order is not guaranteed). The new check uses a regular expression to check that the CONNECT line is correct and that the headers include correct Host in some position. --- README.md | 1 + lib/resty/http.lua | 29 +++++++++++++++++++---------- t/14-host-header.t | 2 +- t/16-http-proxy.t | 6 ++---- 4 files changed, 23 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 14dfdec..c27ebf5 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ Production ready. * [new](#new) * [connect](#connect) * [connect_proxy](#connect_proxy) +* [set_proxy_options](#set_proxy_options) * [set_timeout](#set_timeout) * [set_timeouts](#set_timeouts) * [ssl_handshake](#ssl_handshake) diff --git a/lib/resty/http.lua b/lib/resty/http.lua index afb00f9..19c01e2 100644 --- a/lib/resty/http.lua +++ b/lib/resty/http.lua @@ -15,6 +15,8 @@ local tbl_concat = table.concat local tbl_insert = table.insert local ngx_encode_args = ngx.encode_args local ngx_re_match = ngx.re.match +local ngx_re_gmatch = ngx.re.gmatch +local ngx_re_sub = ngx.re.sub local ngx_re_gsub = ngx.re.gsub local ngx_re_find = ngx.re.find local ngx_log = ngx.log @@ -958,7 +960,7 @@ function _M.proxy_response(self, response, chunksize) end function _M.set_proxy_options(self, opts) - self.proxy_opts = opts + self.proxy_opts = tbl_copy(opts) -- Take by value end function _M.get_proxy_uri(self, scheme, host) @@ -977,20 +979,27 @@ function _M.get_proxy_uri(self, scheme, host) local no_proxy_set = {} -- wget allows domains in no_proxy list to be prefixed by "." -- e.g. no_proxy=.mit.edu - for host_suffix in self.proxy_opts.no_proxy:gmatch("%.?([^,]+)") do - no_proxy_set[host_suffix] = true + for host_suffix in ngx_re_gmatch(self.proxy_opts.no_proxy, "\\.?([^,]+)") do + no_proxy_set[host_suffix[1]] = true end -- From curl docs: -- matched as either a domain which contains the hostname, or the -- hostname itself. For example local.com would match local.com, - -- local.com:80, and www.local.com, but not www.notlocal.com. - for pos in host:gmatch("%f[^%z%.]()") do - local host_suffix = host:sub(pos, -1) - if no_proxy_set[host_suffix] then - return nil - end - end + -- local.com:80, and www.local.com, but not www.notlocal.com. + -- + -- Therefore, we keep stripping subdomains from the host, compare + -- them to the ones in the no_proxy list and continue until we find + -- a match or until there's only the TLD left + repeat + if no_proxy_set[host] then + return nil + end + + -- Strip the next level from the domain and check if that one + -- is on the list + host = ngx_re_sub(host, "^[^.]+\\.", "") + until not ngx_re_find(host, "\\.") end if scheme == "http" and self.proxy_opts.http_proxy then diff --git a/t/14-host-header.t b/t/14-host-header.t index 6ea28eb..110a669 100644 --- a/t/14-host-header.t +++ b/t/14-host-header.t @@ -12,7 +12,7 @@ $ENV{TEST_COVERAGE} ||= 0; our $HttpConfig = qq{ lua_package_path "$pwd/lib/?.lua;/usr/local/share/lua/5.1/?.lua;;"; error_log logs/error.log debug; - resolver 8.8.8.8; + resolver 8.8.8.8 ipv6=off; init_by_lua_block { if $ENV{TEST_COVERAGE} == 1 then diff --git a/t/16-http-proxy.t b/t/16-http-proxy.t index bdedded..a0bdf39 100644 --- a/t/16-http-proxy.t +++ b/t/16-http-proxy.t @@ -279,10 +279,8 @@ GET /lua } } --- tcp_listen: 12345 ---- tcp_query eval_stdout -# Note: The incoming request contains CRLF line endings and print needs to -# be used here to get the same line breaks to the expected request -print "CONNECT 127.0.0.1:443 HTTP/1.1\r\nUser-Agent: test_ua\r\nHost: 127.0.0.1:443\r\n\r\n" +--- tcp_query eval +qr/CONNECT 127.0.0.1:443 HTTP\/1.1\r\n.*Host: 127.0.0.1:443\r\n.*/s # The reply cannot be successful or otherwise the client would start # to do a TLS handshake with the proxied host and that we cannot From c39cf8c48718668a52e230a2be3518a2912d203a Mon Sep 17 00:00:00 2001 From: James Hurst Date: Sat, 23 Dec 2017 14:21:55 +0000 Subject: [PATCH 5/5] Version bump --- lib/resty/http.lua | 2 +- lib/resty/http_headers.lua | 2 +- ...sty-http-0.11-0.rockspec => lua-resty-http-0.12-0.rockspec | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) rename lua-resty-http-0.11-0.rockspec => lua-resty-http-0.12-0.rockspec (93%) diff --git a/lib/resty/http.lua b/lib/resty/http.lua index 13694bf..b3cce43 100644 --- a/lib/resty/http.lua +++ b/lib/resty/http.lua @@ -100,7 +100,7 @@ end local _M = { - _VERSION = '0.11', + _VERSION = '0.12', } _M._USER_AGENT = "lua-resty-http/" .. _M._VERSION .. " (Lua) ngx_lua/" .. ngx.config.ngx_lua_version diff --git a/lib/resty/http_headers.lua b/lib/resty/http_headers.lua index 6ff26fb..56069ec 100644 --- a/lib/resty/http_headers.lua +++ b/lib/resty/http_headers.lua @@ -4,7 +4,7 @@ local rawget, rawset, setmetatable = local str_lower = string.lower local _M = { - _VERSION = '0.11', + _VERSION = '0.12', } diff --git a/lua-resty-http-0.11-0.rockspec b/lua-resty-http-0.12-0.rockspec similarity index 93% rename from lua-resty-http-0.11-0.rockspec rename to lua-resty-http-0.12-0.rockspec index e2088ac..d727114 100644 --- a/lua-resty-http-0.11-0.rockspec +++ b/lua-resty-http-0.12-0.rockspec @@ -1,8 +1,8 @@ package = "lua-resty-http" -version = "0.11-0" +version = "0.12-0" source = { url = "git://github.com/pintsized/lua-resty-http", - tag = "v0.11" + tag = "v0.12" } description = { summary = "Lua HTTP client cosocket driver for OpenResty / ngx_lua.",