Skip to content

Commit

Permalink
Named regex captures
Browse files Browse the repository at this point in the history
  • Loading branch information
eliaskosunen committed Dec 1, 2023
1 parent 64ec7d7 commit 73e43c5
Show file tree
Hide file tree
Showing 4 changed files with 178 additions and 70 deletions.
104 changes: 68 additions & 36 deletions include/scn/detail/regex.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,8 @@

#if SCN_REGEX_BACKEND == SCN_REGEX_BACKEND_STD
#define SCN_REGEX_SUPPORTS_NAMED_CAPTURES 0
#elif SCN_REGEX_BACKEND == SCN_REGEX_BACKEND_BOOST
#define SCN_REGEX_SUPPORTS_NAMED_CAPTURES 0
#elif SCN_REGEX_BACKEND == SCN_REGEX_BACKEND_RE2
#define SCN_REGEX_SUPPORTS_NAMED_CAPTURES 1
#else
#error TODO
#define SCN_REGEX_SUPPORTS_NAMED_CAPTURES 1
#endif

#if SCN_REGEX_BACKEND == SCN_REGEX_BACKEND_RE2
Expand All @@ -44,51 +40,87 @@ namespace scn {
SCN_BEGIN_NAMESPACE

template <typename CharT>
struct basic_regex_matches {
class match {
public:
using char_type = CharT;
class basic_regex_match {
public:
using char_type = CharT;

match(std::basic_string_view<CharT> str) : m_str(str) {}
basic_regex_match(std::basic_string_view<CharT> str) : m_str(str) {}

#if SCN_REGEX_SUPPORTS_NAMED_CAPTURES
match(std::basic_string_view<CharT> str,
std::basic_string<CharT> name)
: m_str(str), m_name(name)
{
}
basic_regex_match(std::basic_string_view<CharT> str,
std::basic_string<CharT> name)
: m_str(str), m_name(name)
{
}
#endif

std::basic_string_view<CharT> get() const
{
return m_str;
}
std::basic_string_view<CharT> get() const
{
return m_str;
}

auto operator*() const
{
return m_str;
}
auto operator->() const
{
return &m_str;
}
auto operator*() const
{
return m_str;
}
auto operator->() const
{
return &m_str;
}

#if SCN_REGEX_SUPPORTS_NAMED_CAPTURES
std::optional<std::basic_string_view<CharT>> name() const
{
return m_name;
}
std::optional<std::basic_string_view<CharT>> name() const
{
return m_name;
}
#endif

private:
std::basic_string_view<CharT> m_str;
private:
std::basic_string_view<CharT> m_str;

#if SCN_REGEX_SUPPORTS_NAMED_CAPTURES
std::optional<std::basic_string<CharT>> m_name;
std::optional<std::basic_string<CharT>> m_name;
#endif
};
};

std::vector<std::optional<match>> matches;
template <typename CharT>
class basic_regex_matches
: private std::vector<std::optional<basic_regex_match<CharT>>> {
using base = std::vector<std::optional<basic_regex_match<CharT>>>;

public:
using match_type = basic_regex_match<CharT>;
using typename base::const_iterator;
using typename base::const_reverse_iterator;
using typename base::iterator;
using typename base::pointer;
using typename base::reference;
using typename base::reverse_iterator;
using typename base::size_type;
using typename base::value_type;

using base::base;

using base::emplace;
using base::emplace_back;
using base::insert;
using base::push_back;

using base::reserve;
using base::resize;

using base::at;
using base::operator[];

using base::begin;
using base::end;
using base::rbegin;
using base::rend;

using base::data;
using base::size;

using base::swap;
};

SCN_END_NAMESPACE
Expand Down
5 changes: 5 additions & 0 deletions include/scn/fwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,14 @@ namespace scn {

// detail/regex.h:

template <typename CharT>
struct basic_regex_match;
template <typename CharT>
struct basic_regex_matches;

using regex_match = basic_regex_match<char>;
using wregex_match = basic_regex_match<wchar_t>;

using regex_matches = basic_regex_matches<char>;
using wregex_matches = basic_regex_matches<wchar_t>;

Expand Down
89 changes: 65 additions & 24 deletions src/scn/impl/reader/regex_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,10 @@ namespace scn {
return unexpected_scan_error(scan_error::invalid_scanned_value,
"Regular expression didn't match");
}
value.matches.resize(matches.size());
value.resize(matches.size());
ranges::transform(
matches, value.matches.begin(),
[](auto&& match)
-> std::optional<
typename basic_regex_matches<CharT>::match> {
matches, value.begin(),
[](auto&& match) -> std::optional<basic_regex_match<CharT>> {
if (!match.matched)
return std::nullopt;
return detail::make_string_view_from_pointers(match.first,
Expand All @@ -116,6 +114,33 @@ namespace scn {
return input.begin() +
ranges::distance(input.data(), matches[0].second);
#elif SCN_REGEX_BACKEND == SCN_REGEX_BACKEND_BOOST
std::vector<std::basic_string<CharT>> names;
for (size_t i = 0; i < pattern.size();) {
if constexpr (std::is_same_v<CharT, char>) {
i = pattern.find("(?<", i);
}
else {
i = pattern.find(L"(?<", i);
}

if (i == std::basic_string_view<CharT>::npos) {
break;
}
if (i > 0 && pattern[i - 1] == CharT{'\\'}) {
if (i == 1 || pattern[i - 2] != CharT{'\\'}) {
i += 3;
continue;
}
}

i += 3;
auto end_i = pattern.find(CharT{'>'}, i);
if (end_i == std::basic_string_view<CharT>::npos) {
break;
}
names.emplace_back(pattern.substr(i, end_i - i));
}

auto re = boost::basic_regex<CharT>{pattern.data(), pattern.size(),
boost::regex_constants::normal};
boost::match_results<const CharT*> matches{};
Expand All @@ -126,16 +151,25 @@ namespace scn {
return unexpected_scan_error(scan_error::invalid_scanned_value,
"Regular expression didn't match");
}
value.matches.resize(matches.size());

value.resize(matches.size());
ranges::transform(
matches, value.matches.begin(),
[](auto&& match)
-> std::optional<
typename basic_regex_matches<CharT>::match> {
matches, value.begin(),
[&](auto&& match) -> std::optional<basic_regex_match<CharT>> {
if (!match.matched)
return std::nullopt;
return detail::make_string_view_from_pointers(match.first,
match.second);
auto sv = detail::make_string_view_from_pointers(
match.first, match.second);

if (auto name_it = ranges::find_if(names,
[&](const auto& name) {
return match ==
matches[name];
});
name_it != names.end()) {
return basic_regex_match<CharT>{sv, *name_it};
}
return sv;
});
return input.begin() +
ranges::distance(input.data(), matches[0].second);
Expand Down Expand Up @@ -163,18 +197,25 @@ namespace scn {
return unexpected_scan_error(scan_error::invalid_scanned_value,
"Regular expression didn't match");
}
value.matches.resize(matches.size() + 1);
value.matches[0] = detail::make_string_view_from_pointers(
input.data(), new_input.data());
ranges::transform(
matches, value.matches.begin() + 1,
[](auto&& match)
-> std::optional<
typename basic_regex_matches<CharT>::match> {
if (!match)
return std::nullopt;
return *match;
});
value.resize(matches.size() + 1);
value[0] = detail::make_string_view_from_pointers(input.data(),
new_input.data());
ranges::transform(matches, value.begin() + 1,
[&](auto&& match) -> std::optional<regex_match> {
if (!match)
return std::nullopt;
return *match;
});
{
const auto& capturing_groups = re.CapturingGroupNames();
for (size_t i = 1; i < value.size(); ++i) {
if (auto it = capturing_groups.find(static_cast<int>(i));
it != capturing_groups.end()) {
auto val = value[i]->get();
value[i].emplace(val, it->second);
};
}
}
return input.begin() +
ranges::distance(input.data(), new_input.data());
#else
Expand Down
50 changes: 40 additions & 10 deletions tests/unittests/regex_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ TEST(RegexTest, String)

TEST(RegexTest, StringView)
{
auto r =
scn::scan<std::string_view>("foobar123", "{:/([a-zA-Z]+)/}");
auto r = scn::scan<std::string_view>("foobar123", "{:/([a-zA-Z]+)/}");
ASSERT_TRUE(r);
EXPECT_FALSE(r->range().empty());
EXPECT_EQ(r->value(), "foobar");
Expand All @@ -45,12 +44,43 @@ TEST(RegexTest, Matches)
scn::scan<scn::regex_matches>("foobar123", "{:/([a-zA-Z]+)([0-9]+)/}");
ASSERT_TRUE(r);
EXPECT_TRUE(r->range().empty());
EXPECT_THAT(r->value().matches,
testing::ElementsAre(
testing::Optional(testing::Property(
&scn::regex_matches::match::get, "foobar123"sv)),
testing::Optional(testing::Property(
&scn::regex_matches::match::get, "foobar"sv)),
testing::Optional(testing::Property(
&scn::regex_matches::match::get, "123"sv))));
EXPECT_THAT(r->value(), testing::ElementsAre(
testing::Optional(testing::Property(
&scn::regex_match::get, "foobar123"sv)),
testing::Optional(testing::Property(
&scn::regex_match::get, "foobar"sv)),
testing::Optional(testing::Property(
&scn::regex_match::get, "123"sv))));
}

#if SCN_REGEX_SUPPORTS_NAMED_CAPTURES
TEST(RegexTest, NamedString)
{
auto r = scn::scan<std::string>("foobar123",
"{:/(?<prefix>[a-zA-Z]+)([0-9]+)/}");
ASSERT_TRUE(r);
EXPECT_TRUE(r->range().empty());
EXPECT_EQ(r->value(), "foobar123");
}

TEST(RegexTest, NamedMatches)
{
auto r = scn::scan<scn::regex_matches>("foobar123",
"{:/(?<prefix>[a-zA-Z]+)([0-9]+)/}");
ASSERT_TRUE(r);
EXPECT_TRUE(r->range().empty());

ASSERT_TRUE(r->value()[0]);
EXPECT_EQ(r->value()[0]->get(), "foobar123");
EXPECT_FALSE(r->value()[0]->name());

ASSERT_TRUE(r->value()[1]);
EXPECT_EQ(r->value()[1]->get(), "foobar");
ASSERT_TRUE(r->value()[1]->name());
EXPECT_EQ(*r->value()[1]->name(), "prefix");

ASSERT_TRUE(r->value()[2]);
EXPECT_EQ(r->value()[2]->get(), "123");
EXPECT_FALSE(r->value()[2]->name());
}
#endif

0 comments on commit 73e43c5

Please sign in to comment.