From 01ce9e0351ba01912ac1be8b07936ec7c9d28dcc Mon Sep 17 00:00:00 2001 From: Henner Zeller Date: Mon, 5 Aug 2024 03:34:47 -0700 Subject: [PATCH] Switch over to RE2 for waivers. --- .github/bin/check-potential-problems.sh | 9 ++++ common/analysis/BUILD | 7 +-- common/analysis/lint_waiver.cc | 61 +++++++++++++++---------- common/analysis/lint_waiver.h | 18 +++++--- common/analysis/lint_waiver_test.cc | 6 ++- 5 files changed, 64 insertions(+), 37 deletions(-) diff --git a/.github/bin/check-potential-problems.sh b/.github/bin/check-potential-problems.sh index cd8abf0d3..c30676da9 100755 --- a/.github/bin/check-potential-problems.sh +++ b/.github/bin/check-potential-problems.sh @@ -83,4 +83,13 @@ if [ $? -eq 0 ]; then EXIT_CODE=1 fi +# Never use std::regex. +find common verilog -name "*.h" -o -name "*.cc" | \ + xargs grep -n '#include ' +if [ $? -eq 0 ]; then + echo "::error:: Don't use stdlib regex, it is slow and requires exceptions. Use RE2 instead (https://github.com/google/re2; header #include \"re2/re2.h\")." + echo + EXIT_CODE=1 +fi + exit "${EXIT_CODE}" diff --git a/common/analysis/BUILD b/common/analysis/BUILD index c6a7ff9be..05ff2817c 100644 --- a/common/analysis/BUILD +++ b/common/analysis/BUILD @@ -121,11 +121,6 @@ cc_library( name = "lint-waiver", srcs = ["lint_waiver.cc"], hdrs = ["lint_waiver.h"], - copts = select({ - "@platforms//os:windows": [], - "//conditions:default": ["-fexceptions"], - }), - features = ["-use_header_modules"], # precompiled headers incompatible with -fexceptions. deps = [ ":command-file-lexer", "//common/strings:comment-utils", @@ -139,10 +134,12 @@ cc_library( "//common/util:file-util", "//common/util:iterator-range", "//common/util:logging", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:string_view", + "@com_googlesource_code_re2//:re2", ], ) diff --git a/common/analysis/lint_waiver.cc b/common/analysis/lint_waiver.cc index 540a8808a..d3ce01a33 100644 --- a/common/analysis/lint_waiver.cc +++ b/common/analysis/lint_waiver.cc @@ -19,7 +19,7 @@ #include #include #include -#include // NOLINT +#include #include #include #include @@ -43,6 +43,7 @@ #include "common/util/file_util.h" #include "common/util/iterator_range.h" #include "common/util/logging.h" +#include "re2/re2.h" namespace verible { @@ -56,26 +57,43 @@ void LintWaiver::WaiveLineRange(absl::string_view rule_name, int line_begin, line_set.Add({line_begin, line_end}); } -void LintWaiver::WaiveWithRegex(absl::string_view rule_name, - const std::string ®ex_str) { - RegexVector ®ex_vector = waiver_re_map_[rule_name]; - auto regex_iter = regex_cache_.find(regex_str); +const RE2 *LintWaiver::GetOrCreateCachedRegex(absl::string_view regex_str) { + auto found = regex_cache_.find(regex_str); + if (found != regex_cache_.end()) { + return found->second.get(); + } + + auto inserted = regex_cache_.emplace( + regex_str, std::make_unique(regex_str, re2::RE2::Quiet)); + return inserted.first->second.get(); +} - if (regex_iter == regex_cache_.end()) { - regex_cache_[regex_str] = std::regex(regex_str); +absl::Status LintWaiver::WaiveWithRegex(absl::string_view rule_name, + absl::string_view regex_str) { + const std::string regex_as_group = absl::StrCat("(", regex_str, ")"); + const RE2 *regex = GetOrCreateCachedRegex(regex_as_group); + if (!regex->ok()) { + return absl::InvalidArgumentError( + absl::StrCat("Invalid regex: ", regex->error())); } - regex_vector.push_back(®ex_cache_[regex_str]); + waiver_re_map_[rule_name].push_back(regex); + return absl::OkStatus(); } void LintWaiver::RegexToLines(absl::string_view contents, const LineColumnMap &line_map) { for (const auto &rule : waiver_re_map_) { - for (const auto *re : rule.second) { - for (std::cregex_iterator i(contents.begin(), contents.end(), *re); - i != std::cregex_iterator(); i++) { - const std::cmatch &match = *i; - WaiveOneLine(rule.first, line_map.LineAtOffset(match.position())); + for (const RE2 *re : rule.second) { + absl::string_view walk = contents; + absl::string_view match; + while (RE2::FindAndConsume(&walk, *re, &match)) { + const size_t pos = match.begin() - contents.begin(); + WaiveOneLine(rule.first, line_map.LineAtOffset(pos)); + if (match.empty()) { + if (match.end() == contents.end()) break; + walk = contents.substr(pos + 1); + } } } } @@ -338,15 +356,12 @@ static absl::Status WaiveCommandHandler( } if (option == "location") { - const std::string file_match_regex(val); - try { - const std::regex file_matcher(file_match_regex); - location_match = - std::regex_search(std::string(lintee_filename), file_matcher); - } catch (const std::regex_error &e) { + const RE2 file_match_regex(val); + if (!file_match_regex.ok()) { return WaiveCommandError(token_pos, waive_file, "--location regex is invalid"); } + location_match = RE2::PartialMatch(lintee_filename, file_match_regex); continue; } @@ -369,13 +384,9 @@ static absl::Status WaiveCommandHandler( } if (can_use_regex) { - try { - waiver->WaiveWithRegex(rule, regex); - } catch (const std::regex_error &e) { - const char *reason = e.what(); - + if (auto status = waiver->WaiveWithRegex(rule, regex); !status.ok()) { return WaiveCommandError(regex_token_pos, waive_file, - "Invalid regex: ", reason); + status.message()); } } diff --git a/common/analysis/lint_waiver.h b/common/analysis/lint_waiver.h index 8b2a07b49..f097d8ed8 100644 --- a/common/analysis/lint_waiver.h +++ b/common/analysis/lint_waiver.h @@ -16,12 +16,13 @@ #define VERIBLE_COMMON_ANALYSIS_LINT_WAIVER_H_ #include -#include // NOLINT +#include #include #include #include #include +#include "absl/container/flat_hash_map.h" #include "absl/status/status.h" #include "absl/strings/string_view.h" #include "common/strings/line_column_map.h" @@ -29,13 +30,15 @@ #include "common/text/text_structure.h" #include "common/text/token_stream_view.h" #include "common/util/container_util.h" +#include "re2/re2.h" namespace verible { // LintWaiver maintains a set of line ranges per lint rule that should be // exempt from each rule. class LintWaiver { - using RegexVector = std::vector; + // Vector of un-owned regexes. + using RegexVector = std::vector; public: LintWaiver() = default; @@ -58,7 +61,8 @@ class LintWaiver { int line_end); // Adds a regular expression which will be used to apply a waiver. - void WaiveWithRegex(absl::string_view rule_name, const std::string ®ex); + absl::Status WaiveWithRegex(absl::string_view rule_name, + absl::string_view regex); // Converts the prepared regular expressions to line numbers and applies the // waivers. @@ -82,14 +86,16 @@ class LintWaiver { } private: + const RE2 *GetOrCreateCachedRegex(absl::string_view regex_str); + // Keys in the maps below are the names of the waived rules. They can be // string_view because the static strings for each lint rule class exist, // and will outlive all LintWaiver objects. This applies to both waiver_map_ // and waiver_re_map_. - std::map waiver_map_; - std::map waiver_re_map_; + absl::flat_hash_map waiver_map_; + absl::flat_hash_map waiver_re_map_; - std::map regex_cache_; + absl::flat_hash_map> regex_cache_; }; // LintWaiverBuilder is a language-agnostic helper class for constructing diff --git a/common/analysis/lint_waiver_test.cc b/common/analysis/lint_waiver_test.cc index fe1e28852..40b6b5670 100644 --- a/common/analysis/lint_waiver_test.cc +++ b/common/analysis/lint_waiver_test.cc @@ -792,7 +792,7 @@ TEST_F(LintWaiverBuilderTest, RegexToLinesCatchAll) { const absl::string_view cfg_regex = "waive --rule=rule-1 --regex=\".*\""; EXPECT_OK(ApplyExternalWaivers(active_rules, user_file, cfg_file, cfg_regex)); - const absl::string_view file = "abc\ndef\nghi\n"; + const absl::string_view file = "abc\ndef\nghi\n\n"; const LineColumnMap line_map(file); lint_waiver_.RegexToLines(file, line_map); @@ -801,6 +801,10 @@ TEST_F(LintWaiverBuilderTest, RegexToLinesCatchAll) { EXPECT_TRUE(lint_waiver_.RuleIsWaivedOnLine("rule-1", 0)); EXPECT_TRUE(lint_waiver_.RuleIsWaivedOnLine("rule-1", 1)); EXPECT_TRUE(lint_waiver_.RuleIsWaivedOnLine("rule-1", 2)); + EXPECT_TRUE(lint_waiver_.RuleIsWaivedOnLine("rule-1", 3)); + EXPECT_TRUE(lint_waiver_.RuleIsWaivedOnLine("rule-1", 4)); + + EXPECT_FALSE(lint_waiver_.RuleIsWaivedOnLine("rule-1", 5)); // non-exist line } TEST_F(LintWaiverBuilderTest, RegexToLinesMultipleMatches) {