Skip to content

Commit

Permalink
Don't assume string_view::iterator is const char*
Browse files Browse the repository at this point in the history
There were a few assumptions in the code that these types are equivalent,
but they are not on all platforms.

The fixes sometimes involve ugly `&*some_iterator` constructs to get to
the underlying location. Some of these should be re-considered after
switching to c++20.

We also have a `string_view_null_iterator()` function to generate
an iterator that is not initialized to be used in place where we
previously used comparison against nullptr.

Issues: #2323
  • Loading branch information
hzeller committed Jan 14, 2025
1 parent 26188e7 commit ecc7bdc
Show file tree
Hide file tree
Showing 30 changed files with 140 additions and 125 deletions.
6 changes: 0 additions & 6 deletions MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,6 @@ bazel_dep(name = "rules_bison", version = "0.3")
bazel_dep(name = "googletest", version = "1.14.0.bcr.1", dev_dependency = True)

bazel_dep(name = "abseil-cpp", version = "20240116.2")
single_version_override(
module_name = "abseil-cpp",
patch_strip = 1,
patches = ["//bazel:absl.patch"],
version = "20240116.2",
)

# Last protobuf version working with windows without strange linking errors.
# This also means that we unfortunately can't use the @protobuf//bazel rules
Expand Down
18 changes: 0 additions & 18 deletions bazel/absl.patch

This file was deleted.

8 changes: 4 additions & 4 deletions verible/common/analysis/lint-rule-status.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,16 @@ std::string AutoFix::Apply(absl::string_view base) const {
std::string result;
auto prev_start = base.cbegin();
for (const auto &edit : edits_) {
CHECK_LE(base.cbegin(), edit.fragment.cbegin());
CHECK_GE(base.cend(), edit.fragment.cend());
CHECK(base.cbegin() <= edit.fragment.cbegin());
CHECK(base.cend() >= edit.fragment.cend());

const absl::string_view text_before(
prev_start, std::distance(prev_start, edit.fragment.cbegin()));
&*prev_start, std::distance(prev_start, edit.fragment.cbegin()));
absl::StrAppend(&result, text_before, edit.replacement);

prev_start = edit.fragment.cend();
}
const absl::string_view text_after(prev_start,
const absl::string_view text_after(&*prev_start,
std::distance(prev_start, base.cend()));
return absl::StrCat(result, text_after);
}
Expand Down
4 changes: 2 additions & 2 deletions verible/common/formatting/align.h
Original file line number Diff line number Diff line change
Expand Up @@ -396,8 +396,8 @@ ColumnPositionTree ScanPartitionForAlignmentCells_WithNonTreeTokens(

// Collect tokens excluded from SyntaxTree (delimiters and comments)
CHECK(!ftokens.empty());
CHECK_LE(ftokens.front().Text().begin(), first_tree_token.text().begin());
CHECK_GE(ftokens.back().Text().end(), last_tree_token.text().end());
CHECK(ftokens.front().Text().begin() <= first_tree_token.text().begin());
CHECK(ftokens.back().Text().end() >= last_tree_token.text().end());

FormatTokenRange::const_iterator ftoken_it = ftokens.begin();
// Find leading non-tree tokens range end
Expand Down
30 changes: 17 additions & 13 deletions verible/common/formatting/align_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ class AlignmentTestFixture : public ::testing::Test,
public UnwrappedLineMemoryHandler {
public:
explicit AlignmentTestFixture(absl::string_view text)
: sample_(text),
: sample_backing_(text),
sample_(sample_backing_),
tokens_(absl::StrSplit(sample_, absl::ByAnyChar(" \n"),
absl::SkipEmpty())) {
for (const auto token : tokens_) {
Expand All @@ -80,7 +81,8 @@ class AlignmentTestFixture : public ::testing::Test,
}

protected:
const std::string sample_;
const std::string sample_backing_;
const absl::string_view sample_;
const std::vector<absl::string_view> tokens_;
std::vector<TokenInfo> ftokens_;
};
Expand Down Expand Up @@ -271,7 +273,7 @@ TEST_F(Sparse3x3MatrixAlignmentTest, AlignmentPolicyFlushLeft) {

TEST_F(Sparse3x3MatrixAlignmentTest, AlignmentPolicyPreserve) {
// Set previous-token string pointers to preserve spaces.
ConnectPreFormatTokensPreservedSpaceStarts(sample_.data(),
ConnectPreFormatTokensPreservedSpaceStarts(sample_.begin(),
&pre_format_tokens_);

TabularAlignTokens(40, sample_, ByteOffsetSet(), kPreserveAlignmentHandler,
Expand Down Expand Up @@ -391,7 +393,7 @@ TEST_F(Sparse3x3MatrixAlignmentTest, IgnoreCommentLine) {

TEST_F(Sparse3x3MatrixAlignmentTest, CompletelyDisabledNoAlignment) {
// Disabled ranges use original spacing
ConnectPreFormatTokensPreservedSpaceStarts(sample_.data(),
ConnectPreFormatTokensPreservedSpaceStarts(sample_.begin(),
&pre_format_tokens_);

// Require 1 space between tokens.
Expand All @@ -413,7 +415,7 @@ TEST_F(Sparse3x3MatrixAlignmentTest, CompletelyDisabledNoAlignment) {

TEST_F(Sparse3x3MatrixAlignmentTest, CompletelyDisabledNoAlignmentWithIndent) {
// Disabled ranges use original spacing
ConnectPreFormatTokensPreservedSpaceStarts(sample_.data(),
ConnectPreFormatTokensPreservedSpaceStarts(sample_.begin(),
&pre_format_tokens_);

// Require 1 space between tokens.
Expand Down Expand Up @@ -445,7 +447,7 @@ class Sparse3x3MatrixAlignmentMoreSpacesTest
Sparse3x3MatrixAlignmentMoreSpacesTest()
: Sparse3x3MatrixAlignmentTest("one two\nthree four\nfive six") {
// This is needed for preservation of original spacing.
ConnectPreFormatTokensPreservedSpaceStarts(sample_.data(),
ConnectPreFormatTokensPreservedSpaceStarts(sample_.begin(),
&pre_format_tokens_);
}
};
Expand Down Expand Up @@ -480,7 +482,7 @@ TEST_F(Sparse3x3MatrixAlignmentMoreSpacesTest,

TEST_F(Sparse3x3MatrixAlignmentTest, PartiallyDisabledNoAlignment) {
// Disabled ranges use original spacing
ConnectPreFormatTokensPreservedSpaceStarts(sample_.data(),
ConnectPreFormatTokensPreservedSpaceStarts(sample_.begin(),
&pre_format_tokens_);

// Require 1 space between tokens.
Expand Down Expand Up @@ -865,7 +867,7 @@ class Dense2x2MatrixAlignmentTest : public MatrixTreeAlignmentTestFixture {
CHECK_EQ(tokens_.size(), 4);

// Need to know original spacing to be able to infer user-intent.
ConnectPreFormatTokensPreservedSpaceStarts(sample_.data(),
ConnectPreFormatTokensPreservedSpaceStarts(sample_.begin(),
&pre_format_tokens_);

// Require 1 space between tokens.
Expand Down Expand Up @@ -1170,7 +1172,7 @@ TEST_F(SubcolumnsTreeAlignmentTest, AlignmentPolicyFlushLeft) {
}

TEST_F(SubcolumnsTreeAlignmentTest, AlignmentPolicyPreserve) {
ConnectPreFormatTokensPreservedSpaceStarts(sample_.data(),
ConnectPreFormatTokensPreservedSpaceStarts(sample_.begin(),
&pre_format_tokens_);

TabularAlignTokens(40, sample_, ByteOffsetSet(),
Expand Down Expand Up @@ -1332,7 +1334,7 @@ class InferSubcolumnsTreeAlignmentTest : public SubcolumnsTreeAlignmentTest {
"( eleven nineteen-ninety-nine 2k )\n")
: SubcolumnsTreeAlignmentTest(text) {
// Need to know original spacing to be able to infer user-intent.
ConnectPreFormatTokensPreservedSpaceStarts(sample_.data(),
ConnectPreFormatTokensPreservedSpaceStarts(sample_.begin(),
&pre_format_tokens_);

// Require 1 space between tokens.
Expand Down Expand Up @@ -1497,15 +1499,16 @@ class FormatUsingOriginalSpacingTest : public ::testing::Test,
"\n <1NL+7Spaces>\n <1nl+7spaces>"
"\n\n <2NL+2Spaces>\n\n <2nl+2spaces>"
"\n \n\n <1NL+1Space+2NL+2Spaces>\n \n\n <1nl+1space+2nl+2spaces>")
: sample_(text),
: sample_backing_(text),
sample_(sample_backing_),
tokens_(absl::StrSplit(sample_, OutsideCharPairs('<', '>'),
absl::SkipEmpty())) {
for (const auto token : tokens_) {
ftokens_.emplace_back(1, token);
}
// sample_ is the memory-owning string buffer
CreateTokenInfosExternalStringBuffer(ftokens_);
ConnectPreFormatTokensPreservedSpaceStarts(sample_.data(),
ConnectPreFormatTokensPreservedSpaceStarts(sample_.begin(),
&pre_format_tokens_);
}

Expand All @@ -1518,7 +1521,8 @@ class FormatUsingOriginalSpacingTest : public ::testing::Test,
EXPECT_PRED_FORMAT2(TokenPartitionTreesEqualPredFormat, nodes[0], expected);
}

const std::string sample_;
const std::string sample_backing_;
const absl::string_view sample_;
const std::vector<absl::string_view> tokens_;
std::vector<TokenInfo> ftokens_;
};
Expand Down
22 changes: 12 additions & 10 deletions verible/common/formatting/format-token.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ std::ostream &operator<<(std::ostream &stream, const InterTokenInfo &t) {
stream << "{\n spaces_required: " << t.spaces_required
<< "\n break_penalty: " << t.break_penalty
<< "\n break_decision: " << t.break_decision
<< "\n preserve_space?: " << (t.preserved_space_start != nullptr)
<< "\n}";
<< "\n preserve_space?: "
<< (t.preserved_space_start != string_view_null_iterator()) << "\n}";
return stream;
}

Expand Down Expand Up @@ -143,9 +143,10 @@ InterTokenDecision::InterTokenDecision(const InterTokenInfo &info)
action(ConvertSpacing(info.break_decision)),
preserved_space_start(info.preserved_space_start) {}

static absl::string_view OriginalLeadingSpacesRange(const char *begin,
const char *end) {
if (begin == nullptr) {
static absl::string_view OriginalLeadingSpacesRange(
absl::string_view::const_iterator begin,
absl::string_view::const_iterator end) {
if (begin == string_view_null_iterator()) {
VLOG(4) << "no original space range";
return make_string_view_range(end, end); // empty range
}
Expand All @@ -163,7 +164,7 @@ absl::string_view FormattedToken::OriginalLeadingSpaces() const {
std::ostream &FormattedToken::FormattedText(std::ostream &stream) const {
switch (before.action) {
case SpacingDecision::kPreserve: {
if (before.preserved_space_start != nullptr) {
if (before.preserved_space_start != string_view_null_iterator()) {
// Calculate string_view range of pre-existing spaces, and print that.
stream << OriginalLeadingSpaces();
} else {
Expand Down Expand Up @@ -196,15 +197,15 @@ absl::string_view PreFormatToken::OriginalLeadingSpaces() const {

size_t PreFormatToken::LeadingSpacesLength() const {
if (before.break_decision == SpacingOptions::kPreserve &&
before.preserved_space_start != nullptr) {
before.preserved_space_start != string_view_null_iterator()) {
return OriginalLeadingSpaces().length();
}
// in other cases (append, wrap), take the spaces_required value.
return before.spaces_required;
}

int PreFormatToken::ExcessSpaces() const {
if (before.preserved_space_start == nullptr) return 0;
if (before.preserved_space_start == string_view_null_iterator()) return 0;
const absl::string_view leading_spaces = OriginalLeadingSpaces();
int delta = 0;
if (!absl::StrContains(leading_spaces, "\n")) {
Expand All @@ -228,9 +229,10 @@ std::ostream &operator<<(std::ostream &stream, const PreFormatToken &t) {
}

void ConnectPreFormatTokensPreservedSpaceStarts(
const char *buffer_start, std::vector<PreFormatToken> *format_tokens) {
absl::string_view::const_iterator buffer_start,
std::vector<PreFormatToken> *format_tokens) {
VLOG(4) << __FUNCTION__;
CHECK(buffer_start != nullptr);
CHECK(buffer_start != string_view_null_iterator());
for (auto &ftoken : *format_tokens) {
ftoken.before.preserved_space_start = buffer_start;
VLOG(4) << "space: " << VisualizeWhitespace(ftoken.OriginalLeadingSpaces());
Expand Down
15 changes: 12 additions & 3 deletions verible/common/formatting/format-token.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@

namespace verible {

// TODO: find something platform independent that is less ugly.
// Or maybe revisit the place where we need this and see if they could
// be actually represented by a const char * nullptr.
inline absl::string_view::const_iterator string_view_null_iterator() {
return absl::string_view::const_iterator{};
}

// Enumeration for options for formatting spaces between tokens.
// This controls what to explore (if not pre-determined).
// Related enum: SpacingDecision
Expand Down Expand Up @@ -76,7 +83,8 @@ struct InterTokenInfo {
// tokens, for the sake of preserving space.
// Together with the current token, they can form a string_view representing
// pre-existing space from the original buffer.
const char *preserved_space_start = nullptr;
absl::string_view::const_iterator preserved_space_start =
string_view_null_iterator();

InterTokenInfo() = default;
InterTokenInfo(const InterTokenInfo &) = default;
Expand Down Expand Up @@ -158,7 +166,7 @@ std::ostream &operator<<(std::ostream &stream, const PreFormatToken &token);
// inter-token (space) text.
// Note that this does not cover the space between the last token and EOF.
void ConnectPreFormatTokensPreservedSpaceStarts(
const char *buffer_start,
absl::string_view::const_iterator buffer_start,
std::vector<verible::PreFormatToken> *format_tokens);

// Marks formatting-disabled ranges of tokens so that their original spacing is
Expand Down Expand Up @@ -199,7 +207,8 @@ struct InterTokenDecision {
SpacingDecision action = SpacingDecision::kPreserve;

// When preserving spaces before this token, start from this offset.
const char *preserved_space_start = nullptr;
absl::string_view::const_iterator preserved_space_start =
string_view_null_iterator();

InterTokenDecision() = default;

Expand Down
7 changes: 5 additions & 2 deletions verible/common/formatting/format-token_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -218,9 +218,12 @@ class ConnectPreFormatTokensPreservedSpaceStartsTest
public UnwrappedLineMemoryHandler {};

TEST_F(ConnectPreFormatTokensPreservedSpaceStartsTest, Empty) {
const char *text = "";
// We do want to initialize the text, otherwise string_view wraps a nulllptr
// that is checked against downstream.
// NOLINTNEXTLINE(readability-redundant-string-init)
constexpr absl::string_view text("");
CreateTokenInfosExternalStringBuffer({});
ConnectPreFormatTokensPreservedSpaceStarts(text, &pre_format_tokens_);
ConnectPreFormatTokensPreservedSpaceStarts(text.begin(), &pre_format_tokens_);
EXPECT_TRUE(pre_format_tokens_.empty());
}

Expand Down
2 changes: 1 addition & 1 deletion verible/common/formatting/layout-optimizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ LayoutFunction::const_iterator LayoutFunction::AtOrToTheLeftOf(
begin(), end(), column, [](const LayoutFunctionSegment &s, int column) {
return s.column <= column;
});
CHECK_NE(it, begin());
CHECK(it != begin());
return it - 1;
}

Expand Down
11 changes: 7 additions & 4 deletions verible/common/formatting/layout-optimizer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -701,7 +701,8 @@ class LayoutFunctionFactoryTest : public ::testing::Test,
// Setup pointers for OriginalLeadingSpaces()
auto must_wrap_token = must_wrap_pre_format_tokens.begin();
auto joinable_token = joinable_pre_format_tokens.begin();
const char *buffer_start = sample_.data();
absl::string_view sample_view(sample_);
absl::string_view::const_iterator buffer_start = sample_view.begin();
for (size_t i = 0; i < number_of_tokens_in_set; ++i) {
must_wrap_token->before.preserved_space_start = buffer_start;
joinable_token->before.preserved_space_start = buffer_start;
Expand Down Expand Up @@ -2387,10 +2388,11 @@ class TokenPartitionsLayoutOptimizerTest : public ::testing::Test,
public UnwrappedLineMemoryHandler {
public:
TokenPartitionsLayoutOptimizerTest()
: sample_(
: sample_backing_(
// : |10 : |20 : |30 : |40
"one two three four\n"
"eleven twelve thirteen fourteen\n"),
sample_(sample_backing_),
tokens_(
absl::StrSplit(sample_, absl::ByAnyChar(" \n"), absl::SkipEmpty())),
style_(CreateStyle()),
Expand All @@ -2399,7 +2401,7 @@ class TokenPartitionsLayoutOptimizerTest : public ::testing::Test,
ftokens_.emplace_back(1, token);
}
CreateTokenInfosExternalStringBuffer(ftokens_);
ConnectPreFormatTokensPreservedSpaceStarts(sample_.data(),
ConnectPreFormatTokensPreservedSpaceStarts(sample_.begin(),
&pre_format_tokens_);

// Set token properties
Expand All @@ -2423,7 +2425,8 @@ class TokenPartitionsLayoutOptimizerTest : public ::testing::Test,
}

protected:
const std::string sample_;
const std::string sample_backing_;
const absl::string_view sample_;
const std::vector<absl::string_view> tokens_;
std::vector<TokenInfo> ftokens_;
const BasicFormatStyle style_;
Expand Down
6 changes: 4 additions & 2 deletions verible/common/formatting/unwrapped-line-test-utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,10 @@ class UnwrappedLineMemoryHandler {
// Points to the end of joined_token_text_ string buffer.
// Same concept as TextStructureView::EOFToken().
TokenInfo EOFToken() const {
const absl::string_view s(joined_token_text_);
return TokenInfo(verible::TK_EOF, absl::string_view(s.end(), 0));
return TokenInfo(
verible::TK_EOF,
absl::string_view( // NOLINT might be easier with c++20
joined_token_text_.data() + joined_token_text_.length(), 0));
}

protected:
Expand Down
6 changes: 4 additions & 2 deletions verible/common/strings/range.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@

namespace verible {

absl::string_view make_string_view_range(const char *begin, const char *end) {
absl::string_view make_string_view_range(
absl::string_view::const_iterator begin,
absl::string_view::const_iterator end) {
const int length = std::distance(begin, end);
CHECK_GE(length, 0) << "Malformed string bounds.";
return absl::string_view(begin, length);
return absl::string_view(&*begin, length);
}

std::pair<int, int> SubstringOffsets(absl::string_view substring,
Expand Down
6 changes: 4 additions & 2 deletions verible/common/strings/range.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@ namespace verible {
// Construct a string_view from two end-points.
// string_view lacks the two-iterator constructor that (iterator) ranges and
// containers do.
// This exploits the fact that string_view's iterators are just const char*.
absl::string_view make_string_view_range(const char *begin, const char *end);
// Note, this can go with c++20 built-in string_view constructor.
absl::string_view make_string_view_range(
absl::string_view::const_iterator begin,
absl::string_view::const_iterator end);

// Returns [x,y] where superstring.substr(x, y-x) == substring.
// Precondition: substring must be a sub-range of superstring.
Expand Down
Loading

0 comments on commit ecc7bdc

Please sign in to comment.