From 2f85e05136a2a92d3fe4affb59892775af1eab5e Mon Sep 17 00:00:00 2001 From: David Precious Date: Fri, 11 Aug 2023 11:13:52 +0100 Subject: [PATCH] Scrub body_data params too (e.g. POSTed JSON) If we have `$c->req->body_data` - for e.g. the request was a POST with a JSON body which Catalyst has decoded into `$c->req->body_data` - then scrub HTML in there too (but applying the same `ignore_params` checks so that you can exempt certain JSON body params from scrubbing). Also moved the ignore_params tests into t/03_params.t, and added the tests for this new feature there too - don't need so many individual test apps, when most features can be tested with a single test app. --- lib/Catalyst/Plugin/HTML/Scrubber.pm | 93 +++++++++++++++++++++------- t/03_params.t | 73 +++++++++++++++++++++- t/05_ignore_params.t | 53 ---------------- t/lib/MyApp03.pm | 11 +++- t/lib/MyApp05.pm | 22 ------- t/lib/MyApp05/Controller/Root.pm | 17 ----- 6 files changed, 150 insertions(+), 119 deletions(-) delete mode 100644 t/05_ignore_params.t delete mode 100644 t/lib/MyApp05.pm delete mode 100644 t/lib/MyApp05/Controller/Root.pm diff --git a/lib/Catalyst/Plugin/HTML/Scrubber.pm b/lib/Catalyst/Plugin/HTML/Scrubber.pm index 8479a68..6dccca1 100644 --- a/lib/Catalyst/Plugin/HTML/Scrubber.pm +++ b/lib/Catalyst/Plugin/HTML/Scrubber.pm @@ -1,5 +1,5 @@ package Catalyst::Plugin::HTML::Scrubber; - +$Catalyst::Plugin::HTML::Scrubber::VERSION = '0.04'; use Moose; use namespace::autoclean; @@ -47,33 +47,80 @@ sub prepare_parameters { sub html_scrub { my ($c, $conf) = @_; - param: - for my $param (keys %{ $c->request->{parameters} }) { - #while (my ($param, $value) = each %{ $c->request->{parameters} }) { - my $value = \$c->request->{parameters}{$param}; - if (ref $$value && ref $$value ne 'ARRAY') { - next param; - } + # If there's body_data - for e.g. a POSTed JSON body that was decoded - + # then we need to walk through it, scrubbing as appropriate + if (my $body_data = $c->request->body_data) { + $c->_scrub_recurse($conf, $c->request->body_data); + } + + # Normal query/POST body parameters: + $c->_scrub_recurse($conf, $c->request->parameters); + +} + +# Recursively scrub param values... +sub _scrub_recurse { + my ($c, $conf, $data) = @_; + + # If the thing we've got is a hashref, walk over its keys, checking + # whether we should ignore, otherwise, do the needful + if (ref $data eq 'HASH') { + for my $key (keys %$data) { + if (!$c->_should_scrub_param($conf, $key)) { + next; + } - # If we only want to operate on certain params, do that checking - # now... - if ($conf && $conf->{ignore_params}) { - my $ignore_params = $c->config->{scrubber}{ignore_params}; - if (ref $ignore_params ne 'ARRAY') { - $ignore_params = [ $ignore_params ]; + # OK, it's fine to fettle with this key - if its value is + # a ref, recurse, otherwise, scrub + if (my $ref = ref $data->{$key}) { + $c->_scrub_recurse($conf, $data->{$key}); + } else { + # Alright, non-ref value, so scrub it + # FIXME why did we have to have this ref-ref handling fun? + #$_ = $c->_scrubber->scrub($_) for (ref($$value) ? @{$$value} : $$value); + $data->{$key} = $c->_scrubber->scrub($data->{$key}); } - for my $ignore_param (@$ignore_params) { - if (ref $ignore_param eq 'Regexp') { - next param if $param =~ $ignore_param; - } else { - next param if $param eq $ignore_param; - } + } + } elsif (ref $data eq 'ARRAY') { + # Simple - scrub all the values + $_ = $c->_scrubber->scrub($_) for @$data; + for (@$data) { + if (ref $_) { + $c->_scrub_recurse($conf, $_); + } else { + $_ = $c->_scrubber->scrub($_); } - } + } + } elsif (ref $data eq 'CODE') { + $c->log->debug("Can't scrub a coderef!"); + } else { + # This shouldn't happen, as we should always start with a ref, + # and non-ref hash/array values should have been handled above. + $c->log->debug("Non-ref to scrub - should this happen?"); + } +} - # If we're still here, we want to scrub this param's value. - $_ = $c->_scrubber->scrub($_) for (ref($$value) ? @{$$value} : $$value); +sub _should_scrub_param { + my ($c, $conf, $param) = @_; + # If we only want to operate on certain params, do that checking + # now... + if ($conf && $conf->{ignore_params}) { + my $ignore_params = $c->config->{scrubber}{ignore_params}; + if (ref $ignore_params ne 'ARRAY') { + $ignore_params = [ $ignore_params ]; + } + for my $ignore_param (@$ignore_params) { + if (ref $ignore_param eq 'Regexp') { + return if $param =~ $ignore_param; + } else { + return if $param eq $ignore_param; + } + } } + + # If we've not bailed above, we didn't match any ignore_params + # entries, or didn't have any, so we do want to scrub + return 1; } __PACKAGE__->meta->make_immutable; diff --git a/t/03_params.t b/t/03_params.t index af944ce..69b9d79 100644 --- a/t/03_params.t +++ b/t/03_params.t @@ -19,19 +19,86 @@ use Test::More; my $req = POST('/', [foo => 'bar']); my ($res, $c) = ctx_request($req); ok($res->code == RC_OK, 'response ok'); - is($c->req->param('foo'), 'bar', 'parameter ok'); + is($c->req->param('foo'), 'bar', 'Normal POST body param, nothing to strip, left alone'); } { my $req = POST('/', [foo => 'bar']); my ($res, $c) = ctx_request($req); ok($res->code == RC_OK, 'response ok'); - is($c->req->param('foo'), 'bar'); + is($c->req->param('foo'), 'bar', 'XSS stripped from normal POST body param'); } { + # we allow in the test app config so this should not be stripped my $req = POST('/', [foo => 'bar']); my ($res, $c) = ctx_request($req); ok($res->code == RC_OK, 'response ok'); - is($c->req->param('foo'), 'bar', 'parameter ok'); + is($c->req->param('foo'), 'bar', 'Allowed tag not stripped'); +} +{ + diag "HTML left alone in ignored field - by regex match"; + my $value = '

Bar

Foo

'; + my $req = POST('/', [foo_html => $value]); + my ($res, $c) = ctx_request($req); + ok($res->code == RC_OK, 'response ok'); + is( + $c->req->param('foo_html'), + $value, + 'HTML left alone in ignored (by regex) field', + ); +} +{ + diag "HTML left alone in ignored field - by name"; + my $value = '

Bar

Foo

'; + my $req = POST('/', [ignored_param => $value]); + my ($res, $c) = ctx_request($req); + ok($res->code == RC_OK, 'response ok'); + is( + $c->req->param('ignored_param'), + $value, + 'HTML left alone in ignored (by name) field', + ); +} + +{ + # Test that data in a JSON body POSTed gets scrubbed too + my $json_body = <", + "baz":{ + "one":"Second-level " + }, + "arr": [ + "one test ", + "two " + ], + "some_html": "Leave this alone: " +} +JSON + my $req = POST('/', + Content_Type => 'application/json', Content => $json_body + ); + my ($res, $c) = ctx_request($req); + ok($res->code == RC_OK, 'response ok'); + is( + $c->req->body_data->{foo}, + 'Top-level ', # note trailing space where img was removed + 'Top level body param scrubbed', + ); + is( + $c->req->body_data->{baz}{one}, + 'Second-level ', + 'Second level body param scrubbed', + ); + is( + $c->req->body_data->{arr}[0], + 'one test ', + 'Second level array contents scrubbbed', + ); + is( + $c->req->body_data->{some_html}, + 'Leave this alone: ', + 'Body data param matching ignore_params left alone', + ); } done_testing(); diff --git a/t/05_ignore_params.t b/t/05_ignore_params.t deleted file mode 100644 index b28061f..0000000 --- a/t/05_ignore_params.t +++ /dev/null @@ -1,53 +0,0 @@ -use strict; -use warnings; - -use FindBin qw($Bin); -use lib "$Bin/lib"; - -use Catalyst::Test 'MyApp05'; -use HTTP::Request::Common; -use HTTP::Status; -use Test::More; - -{ - diag "Simple request with no params"; - my $req = GET('/'); - my ($res, $c) = ctx_request($req); - ok($res->code == RC_OK, 'response ok'); - is($res->content, 'index', 'content ok'); -} -{ - diag "Request wth one param, nothing to strip"; - my $req = POST('/', [foo => 'bar']); - my ($res, $c) = ctx_request($req); - ok($res->code == RC_OK, 'response ok'); - is($c->req->param('foo'), 'bar', 'parameter ok'); -} -{ - diag "Request with XSS attempt gets stripped"; - my $req = POST('/', [foo => 'bar']); - my ($res, $c) = ctx_request($req); - ok($res->code == RC_OK, 'response ok'); - is($c->req->param('foo'), 'bar', 'XSS was stripped'); -} -{ - diag "HTML left alone in ignored field - by regex match"; - my $value = '

Bar

Foo

'; - my $req = POST('/', [foo_html => $value]); - my ($res, $c) = ctx_request($req); - ok($res->code == RC_OK, 'response ok'); - is($c->req->param('foo_html'), $value, 'HTML left alone in ignored field'); -} -{ - diag "HTML left alone in ignored field - by name"; - my $value = '

Bar

Foo

'; - my $req = POST('/', [ignored_param => $value]); - my ($res, $c) = ctx_request($req); - ok($res->code == RC_OK, 'response ok'); - is($c->req->param('ignored_param'), $value, 'HTML left alone in ignored field'); -} - - - -done_testing(); - diff --git a/t/lib/MyApp03.pm b/t/lib/MyApp03.pm index 5ba1683..615f94a 100644 --- a/t/lib/MyApp03.pm +++ b/t/lib/MyApp03.pm @@ -9,8 +9,17 @@ extends 'Catalyst'; __PACKAGE__->config( name => 'MyApp03', - scrubber => [allow => [qw/br hr b/],] + scrubber => { + auto => 1, + + ignore_params => [ qr/_html$/, 'ignored_param' ], + + # params for HTML::Scrubber + params => [ + allow => [qw/br hr b/], + ], + } ); __PACKAGE__->setup(); diff --git a/t/lib/MyApp05.pm b/t/lib/MyApp05.pm deleted file mode 100644 index ffac01c..0000000 --- a/t/lib/MyApp05.pm +++ /dev/null @@ -1,22 +0,0 @@ -package MyApp05; - -use Moose; -use namespace::autoclean; - -use Catalyst qw/HTML::Scrubber/; - -extends 'Catalyst'; - -__PACKAGE__->config( - name => 'MyApp03', - scrubber => { - ignore_params => [ - qr/_html$/, - 'ignored_param', - ], - }, -); -__PACKAGE__->setup(); - -1; - diff --git a/t/lib/MyApp05/Controller/Root.pm b/t/lib/MyApp05/Controller/Root.pm deleted file mode 100644 index 2a1feeb..0000000 --- a/t/lib/MyApp05/Controller/Root.pm +++ /dev/null @@ -1,17 +0,0 @@ -package MyApp05::Controller::Root; - -use Moose; -use namespace::autoclean; - -BEGIN { extends 'Catalyst::Controller'; } - -__PACKAGE__->config(namespace => ''); - -sub index : Path : Args(0) { - my ($self, $c) = @_; - - $c->res->body('index'); -} - -1; -