diff --git a/include/scn/detail/regex.h b/include/scn/detail/regex.h index 26e528d4..8841b039 100644 --- a/include/scn/detail/regex.h +++ b/include/scn/detail/regex.h @@ -26,12 +26,8 @@ #if SCN_REGEX_BACKEND == SCN_REGEX_BACKEND_STD #define SCN_REGEX_SUPPORTS_NAMED_CAPTURES 0 -#elif SCN_REGEX_BACKEND == SCN_REGEX_BACKEND_BOOST -#define SCN_REGEX_SUPPORTS_NAMED_CAPTURES 0 -#elif SCN_REGEX_BACKEND == SCN_REGEX_BACKEND_RE2 -#define SCN_REGEX_SUPPORTS_NAMED_CAPTURES 1 #else -#error TODO +#define SCN_REGEX_SUPPORTS_NAMED_CAPTURES 1 #endif #if SCN_REGEX_BACKEND == SCN_REGEX_BACKEND_RE2 @@ -44,51 +40,87 @@ namespace scn { SCN_BEGIN_NAMESPACE template - struct basic_regex_matches { - class match { - public: - using char_type = CharT; + class basic_regex_match { + public: + using char_type = CharT; - match(std::basic_string_view str) : m_str(str) {} + basic_regex_match(std::basic_string_view str) : m_str(str) {} #if SCN_REGEX_SUPPORTS_NAMED_CAPTURES - match(std::basic_string_view str, - std::basic_string name) - : m_str(str), m_name(name) - { - } + basic_regex_match(std::basic_string_view str, + std::basic_string name) + : m_str(str), m_name(name) + { + } #endif - std::basic_string_view get() const - { - return m_str; - } + std::basic_string_view get() const + { + return m_str; + } - auto operator*() const - { - return m_str; - } - auto operator->() const - { - return &m_str; - } + auto operator*() const + { + return m_str; + } + auto operator->() const + { + return &m_str; + } #if SCN_REGEX_SUPPORTS_NAMED_CAPTURES - std::optional> name() const - { - return m_name; - } + std::optional> name() const + { + return m_name; + } #endif - private: - std::basic_string_view m_str; + private: + std::basic_string_view m_str; #if SCN_REGEX_SUPPORTS_NAMED_CAPTURES - std::optional> m_name; + std::optional> m_name; #endif - }; + }; - std::vector> matches; + template + class basic_regex_matches + : private std::vector>> { + using base = std::vector>>; + + public: + using match_type = basic_regex_match; + using typename base::const_iterator; + using typename base::const_reverse_iterator; + using typename base::iterator; + using typename base::pointer; + using typename base::reference; + using typename base::reverse_iterator; + using typename base::size_type; + using typename base::value_type; + + using base::base; + + using base::emplace; + using base::emplace_back; + using base::insert; + using base::push_back; + + using base::reserve; + using base::resize; + + using base::at; + using base::operator[]; + + using base::begin; + using base::end; + using base::rbegin; + using base::rend; + + using base::data; + using base::size; + + using base::swap; }; SCN_END_NAMESPACE diff --git a/include/scn/fwd.h b/include/scn/fwd.h index 3eae2374..bb4d8b3d 100644 --- a/include/scn/fwd.h +++ b/include/scn/fwd.h @@ -246,9 +246,14 @@ namespace scn { // detail/regex.h: + template + struct basic_regex_match; template struct basic_regex_matches; + using regex_match = basic_regex_match; + using wregex_match = basic_regex_match; + using regex_matches = basic_regex_matches; using wregex_matches = basic_regex_matches; diff --git a/src/scn/impl/reader/regex_reader.h b/src/scn/impl/reader/regex_reader.h index cdf46007..2a3490c8 100644 --- a/src/scn/impl/reader/regex_reader.h +++ b/src/scn/impl/reader/regex_reader.h @@ -102,12 +102,10 @@ namespace scn { return unexpected_scan_error(scan_error::invalid_scanned_value, "Regular expression didn't match"); } - value.matches.resize(matches.size()); + value.resize(matches.size()); ranges::transform( - matches, value.matches.begin(), - [](auto&& match) - -> std::optional< - typename basic_regex_matches::match> { + matches, value.begin(), + [](auto&& match) -> std::optional> { if (!match.matched) return std::nullopt; return detail::make_string_view_from_pointers(match.first, @@ -116,6 +114,33 @@ namespace scn { return input.begin() + ranges::distance(input.data(), matches[0].second); #elif SCN_REGEX_BACKEND == SCN_REGEX_BACKEND_BOOST + std::vector> names; + for (size_t i = 0; i < pattern.size();) { + if constexpr (std::is_same_v) { + i = pattern.find("(?<", i); + } + else { + i = pattern.find(L"(?<", i); + } + + if (i == std::basic_string_view::npos) { + break; + } + if (i > 0 && pattern[i - 1] == CharT{'\\'}) { + if (i == 1 || pattern[i - 2] != CharT{'\\'}) { + i += 3; + continue; + } + } + + i += 3; + auto end_i = pattern.find(CharT{'>'}, i); + if (end_i == std::basic_string_view::npos) { + break; + } + names.emplace_back(pattern.substr(i, end_i - i)); + } + auto re = boost::basic_regex{pattern.data(), pattern.size(), boost::regex_constants::normal}; boost::match_results matches{}; @@ -126,16 +151,25 @@ namespace scn { return unexpected_scan_error(scan_error::invalid_scanned_value, "Regular expression didn't match"); } - value.matches.resize(matches.size()); + + value.resize(matches.size()); ranges::transform( - matches, value.matches.begin(), - [](auto&& match) - -> std::optional< - typename basic_regex_matches::match> { + matches, value.begin(), + [&](auto&& match) -> std::optional> { if (!match.matched) return std::nullopt; - return detail::make_string_view_from_pointers(match.first, - match.second); + auto sv = detail::make_string_view_from_pointers( + match.first, match.second); + + if (auto name_it = ranges::find_if(names, + [&](const auto& name) { + return match == + matches[name]; + }); + name_it != names.end()) { + return basic_regex_match{sv, *name_it}; + } + return sv; }); return input.begin() + ranges::distance(input.data(), matches[0].second); @@ -163,18 +197,25 @@ namespace scn { return unexpected_scan_error(scan_error::invalid_scanned_value, "Regular expression didn't match"); } - value.matches.resize(matches.size() + 1); - value.matches[0] = detail::make_string_view_from_pointers( - input.data(), new_input.data()); - ranges::transform( - matches, value.matches.begin() + 1, - [](auto&& match) - -> std::optional< - typename basic_regex_matches::match> { - if (!match) - return std::nullopt; - return *match; - }); + value.resize(matches.size() + 1); + value[0] = detail::make_string_view_from_pointers(input.data(), + new_input.data()); + ranges::transform(matches, value.begin() + 1, + [&](auto&& match) -> std::optional { + if (!match) + return std::nullopt; + return *match; + }); + { + const auto& capturing_groups = re.CapturingGroupNames(); + for (size_t i = 1; i < value.size(); ++i) { + if (auto it = capturing_groups.find(static_cast(i)); + it != capturing_groups.end()) { + auto val = value[i]->get(); + value[i].emplace(val, it->second); + }; + } + } return input.begin() + ranges::distance(input.data(), new_input.data()); #else diff --git a/tests/unittests/regex_test.cpp b/tests/unittests/regex_test.cpp index 4c25a7bb..517bf736 100644 --- a/tests/unittests/regex_test.cpp +++ b/tests/unittests/regex_test.cpp @@ -32,8 +32,7 @@ TEST(RegexTest, String) TEST(RegexTest, StringView) { - auto r = - scn::scan("foobar123", "{:/([a-zA-Z]+)/}"); + auto r = scn::scan("foobar123", "{:/([a-zA-Z]+)/}"); ASSERT_TRUE(r); EXPECT_FALSE(r->range().empty()); EXPECT_EQ(r->value(), "foobar"); @@ -45,12 +44,43 @@ TEST(RegexTest, Matches) scn::scan("foobar123", "{:/([a-zA-Z]+)([0-9]+)/}"); ASSERT_TRUE(r); EXPECT_TRUE(r->range().empty()); - EXPECT_THAT(r->value().matches, - testing::ElementsAre( - testing::Optional(testing::Property( - &scn::regex_matches::match::get, "foobar123"sv)), - testing::Optional(testing::Property( - &scn::regex_matches::match::get, "foobar"sv)), - testing::Optional(testing::Property( - &scn::regex_matches::match::get, "123"sv)))); + EXPECT_THAT(r->value(), testing::ElementsAre( + testing::Optional(testing::Property( + &scn::regex_match::get, "foobar123"sv)), + testing::Optional(testing::Property( + &scn::regex_match::get, "foobar"sv)), + testing::Optional(testing::Property( + &scn::regex_match::get, "123"sv)))); +} + +#if SCN_REGEX_SUPPORTS_NAMED_CAPTURES +TEST(RegexTest, NamedString) +{ + auto r = scn::scan("foobar123", + "{:/(?[a-zA-Z]+)([0-9]+)/}"); + ASSERT_TRUE(r); + EXPECT_TRUE(r->range().empty()); + EXPECT_EQ(r->value(), "foobar123"); +} + +TEST(RegexTest, NamedMatches) +{ + auto r = scn::scan("foobar123", + "{:/(?[a-zA-Z]+)([0-9]+)/}"); + ASSERT_TRUE(r); + EXPECT_TRUE(r->range().empty()); + + ASSERT_TRUE(r->value()[0]); + EXPECT_EQ(r->value()[0]->get(), "foobar123"); + EXPECT_FALSE(r->value()[0]->name()); + + ASSERT_TRUE(r->value()[1]); + EXPECT_EQ(r->value()[1]->get(), "foobar"); + ASSERT_TRUE(r->value()[1]->name()); + EXPECT_EQ(*r->value()[1]->name(), "prefix"); + + ASSERT_TRUE(r->value()[2]); + EXPECT_EQ(r->value()[2]->get(), "123"); + EXPECT_FALSE(r->value()[2]->name()); } +#endif