Skip to content

Commit

Permalink
Don't assume string_view::iterator is const char*
Browse files Browse the repository at this point in the history
There were a few assumptions in the code that these types are equivalent,
but they are not on all platforms.

The fixes sometimes involve ugly `&*some_iterator` constructs to get to
the underlying location. Some of these should be re-considered after
switching to c++20.

Issues: #2323
  • Loading branch information
hzeller committed Jan 12, 2025
1 parent 75c38da commit b22cdbe
Show file tree
Hide file tree
Showing 24 changed files with 108 additions and 105 deletions.
6 changes: 0 additions & 6 deletions MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,6 @@ bazel_dep(name = "rules_bison", version = "0.3")
bazel_dep(name = "googletest", version = "1.14.0.bcr.1", dev_dependency = True)

bazel_dep(name = "abseil-cpp", version = "20240116.2")
single_version_override(
module_name = "abseil-cpp",
patch_strip = 1,
patches = ["//bazel:absl.patch"],
version = "20240116.2",
)

# Last protobuf version working with windows without strange linking errors.
# This also means that we unfortunately can't use the @protobuf//bazel rules
Expand Down
18 changes: 0 additions & 18 deletions bazel/absl.patch

This file was deleted.

4 changes: 2 additions & 2 deletions verible/common/analysis/lint-rule-status.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,12 @@ std::string AutoFix::Apply(absl::string_view base) const {
CHECK_GE(base.cend(), edit.fragment.cend());

const absl::string_view text_before(
prev_start, std::distance(prev_start, edit.fragment.cbegin()));
&*prev_start, std::distance(prev_start, edit.fragment.cbegin()));
absl::StrAppend(&result, text_before, edit.replacement);

prev_start = edit.fragment.cend();
}
const absl::string_view text_after(prev_start,
const absl::string_view text_after(&*prev_start,
std::distance(prev_start, base.cend()));
return absl::StrCat(result, text_after);
}
Expand Down
30 changes: 17 additions & 13 deletions verible/common/formatting/align_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ class AlignmentTestFixture : public ::testing::Test,
public UnwrappedLineMemoryHandler {
public:
explicit AlignmentTestFixture(absl::string_view text)
: sample_(text),
: sample_backing_(text),
sample_(sample_backing_),
tokens_(absl::StrSplit(sample_, absl::ByAnyChar(" \n"),
absl::SkipEmpty())) {
for (const auto token : tokens_) {
Expand All @@ -80,7 +81,8 @@ class AlignmentTestFixture : public ::testing::Test,
}

protected:
const std::string sample_;
const std::string sample_backing_;
const absl::string_view sample_;
const std::vector<absl::string_view> tokens_;
std::vector<TokenInfo> ftokens_;
};
Expand Down Expand Up @@ -271,7 +273,7 @@ TEST_F(Sparse3x3MatrixAlignmentTest, AlignmentPolicyFlushLeft) {

TEST_F(Sparse3x3MatrixAlignmentTest, AlignmentPolicyPreserve) {
// Set previous-token string pointers to preserve spaces.
ConnectPreFormatTokensPreservedSpaceStarts(sample_.data(),
ConnectPreFormatTokensPreservedSpaceStarts(sample_.begin(),
&pre_format_tokens_);

TabularAlignTokens(40, sample_, ByteOffsetSet(), kPreserveAlignmentHandler,
Expand Down Expand Up @@ -391,7 +393,7 @@ TEST_F(Sparse3x3MatrixAlignmentTest, IgnoreCommentLine) {

TEST_F(Sparse3x3MatrixAlignmentTest, CompletelyDisabledNoAlignment) {
// Disabled ranges use original spacing
ConnectPreFormatTokensPreservedSpaceStarts(sample_.data(),
ConnectPreFormatTokensPreservedSpaceStarts(sample_.begin(),
&pre_format_tokens_);

// Require 1 space between tokens.
Expand All @@ -413,7 +415,7 @@ TEST_F(Sparse3x3MatrixAlignmentTest, CompletelyDisabledNoAlignment) {

TEST_F(Sparse3x3MatrixAlignmentTest, CompletelyDisabledNoAlignmentWithIndent) {
// Disabled ranges use original spacing
ConnectPreFormatTokensPreservedSpaceStarts(sample_.data(),
ConnectPreFormatTokensPreservedSpaceStarts(sample_.begin(),
&pre_format_tokens_);

// Require 1 space between tokens.
Expand Down Expand Up @@ -445,7 +447,7 @@ class Sparse3x3MatrixAlignmentMoreSpacesTest
Sparse3x3MatrixAlignmentMoreSpacesTest()
: Sparse3x3MatrixAlignmentTest("one two\nthree four\nfive six") {
// This is needed for preservation of original spacing.
ConnectPreFormatTokensPreservedSpaceStarts(sample_.data(),
ConnectPreFormatTokensPreservedSpaceStarts(sample_.begin(),
&pre_format_tokens_);
}
};
Expand Down Expand Up @@ -480,7 +482,7 @@ TEST_F(Sparse3x3MatrixAlignmentMoreSpacesTest,

TEST_F(Sparse3x3MatrixAlignmentTest, PartiallyDisabledNoAlignment) {
// Disabled ranges use original spacing
ConnectPreFormatTokensPreservedSpaceStarts(sample_.data(),
ConnectPreFormatTokensPreservedSpaceStarts(sample_.begin(),
&pre_format_tokens_);

// Require 1 space between tokens.
Expand Down Expand Up @@ -865,7 +867,7 @@ class Dense2x2MatrixAlignmentTest : public MatrixTreeAlignmentTestFixture {
CHECK_EQ(tokens_.size(), 4);

// Need to know original spacing to be able to infer user-intent.
ConnectPreFormatTokensPreservedSpaceStarts(sample_.data(),
ConnectPreFormatTokensPreservedSpaceStarts(sample_.begin(),
&pre_format_tokens_);

// Require 1 space between tokens.
Expand Down Expand Up @@ -1170,7 +1172,7 @@ TEST_F(SubcolumnsTreeAlignmentTest, AlignmentPolicyFlushLeft) {
}

TEST_F(SubcolumnsTreeAlignmentTest, AlignmentPolicyPreserve) {
ConnectPreFormatTokensPreservedSpaceStarts(sample_.data(),
ConnectPreFormatTokensPreservedSpaceStarts(sample_.begin(),
&pre_format_tokens_);

TabularAlignTokens(40, sample_, ByteOffsetSet(),
Expand Down Expand Up @@ -1332,7 +1334,7 @@ class InferSubcolumnsTreeAlignmentTest : public SubcolumnsTreeAlignmentTest {
"( eleven nineteen-ninety-nine 2k )\n")
: SubcolumnsTreeAlignmentTest(text) {
// Need to know original spacing to be able to infer user-intent.
ConnectPreFormatTokensPreservedSpaceStarts(sample_.data(),
ConnectPreFormatTokensPreservedSpaceStarts(sample_.begin(),
&pre_format_tokens_);

// Require 1 space between tokens.
Expand Down Expand Up @@ -1497,15 +1499,16 @@ class FormatUsingOriginalSpacingTest : public ::testing::Test,
"\n <1NL+7Spaces>\n <1nl+7spaces>"
"\n\n <2NL+2Spaces>\n\n <2nl+2spaces>"
"\n \n\n <1NL+1Space+2NL+2Spaces>\n \n\n <1nl+1space+2nl+2spaces>")
: sample_(text),
: sample_backing_(text),
sample_(sample_backing_),
tokens_(absl::StrSplit(sample_, OutsideCharPairs('<', '>'),
absl::SkipEmpty())) {
for (const auto token : tokens_) {
ftokens_.emplace_back(1, token);
}
// sample_ is the memory-owning string buffer
CreateTokenInfosExternalStringBuffer(ftokens_);
ConnectPreFormatTokensPreservedSpaceStarts(sample_.data(),
ConnectPreFormatTokensPreservedSpaceStarts(sample_.begin(),
&pre_format_tokens_);
}

Expand All @@ -1518,7 +1521,8 @@ class FormatUsingOriginalSpacingTest : public ::testing::Test,
EXPECT_PRED_FORMAT2(TokenPartitionTreesEqualPredFormat, nodes[0], expected);
}

const std::string sample_;
const std::string sample_backing_;
const absl::string_view sample_;
const std::vector<absl::string_view> tokens_;
std::vector<TokenInfo> ftokens_;
};
Expand Down
14 changes: 8 additions & 6 deletions verible/common/formatting/format-token.cc
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,10 @@ InterTokenDecision::InterTokenDecision(const InterTokenInfo &info)
action(ConvertSpacing(info.break_decision)),
preserved_space_start(info.preserved_space_start) {}

static absl::string_view OriginalLeadingSpacesRange(const char *begin,
const char *end) {
if (begin == nullptr) {
static absl::string_view OriginalLeadingSpacesRange(
absl::string_view::const_iterator begin,
absl::string_view::const_iterator end) {
if (begin == string_view_null_iterator()) {
VLOG(4) << "no original space range";
return make_string_view_range(end, end); // empty range
}
Expand All @@ -163,7 +164,7 @@ absl::string_view FormattedToken::OriginalLeadingSpaces() const {
std::ostream &FormattedToken::FormattedText(std::ostream &stream) const {
switch (before.action) {
case SpacingDecision::kPreserve: {
if (before.preserved_space_start != nullptr) {
if (before.preserved_space_start != string_view_null_iterator()) {
// Calculate string_view range of pre-existing spaces, and print that.
stream << OriginalLeadingSpaces();
} else {
Expand Down Expand Up @@ -228,11 +229,12 @@ std::ostream &operator<<(std::ostream &stream, const PreFormatToken &t) {
}

void ConnectPreFormatTokensPreservedSpaceStarts(
const char *buffer_start, std::vector<PreFormatToken> *format_tokens) {
absl::string_view::const_iterator buffer_start,
std::vector<PreFormatToken> *format_tokens) {
VLOG(4) << __FUNCTION__;
CHECK(buffer_start != nullptr);
for (auto &ftoken : *format_tokens) {
ftoken.before.preserved_space_start = buffer_start;
ftoken.before.preserved_space_start = &*buffer_start;
VLOG(4) << "space: " << VisualizeWhitespace(ftoken.OriginalLeadingSpaces());
buffer_start = ftoken.Text().end();
}
Expand Down
12 changes: 9 additions & 3 deletions verible/common/formatting/format-token.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@

namespace verible {

// TODO: find something platform independent that is less ugly.
inline absl::string_view::iterator string_view_null_iterator() {
return absl::string_view().begin();
}
// Enumeration for options for formatting spaces between tokens.
// This controls what to explore (if not pre-determined).
// Related enum: SpacingDecision
Expand Down Expand Up @@ -76,7 +80,8 @@ struct InterTokenInfo {
// tokens, for the sake of preserving space.
// Together with the current token, they can form a string_view representing
// pre-existing space from the original buffer.
const char *preserved_space_start = nullptr;
absl::string_view::iterator preserved_space_start =
string_view_null_iterator();

InterTokenInfo() = default;
InterTokenInfo(const InterTokenInfo &) = default;
Expand Down Expand Up @@ -158,7 +163,7 @@ std::ostream &operator<<(std::ostream &stream, const PreFormatToken &token);
// inter-token (space) text.
// Note that this does not cover the space between the last token and EOF.
void ConnectPreFormatTokensPreservedSpaceStarts(
const char *buffer_start,
absl::string_view::const_iterator buffer_start,
std::vector<verible::PreFormatToken> *format_tokens);

// Marks formatting-disabled ranges of tokens so that their original spacing is
Expand Down Expand Up @@ -199,7 +204,8 @@ struct InterTokenDecision {
SpacingDecision action = SpacingDecision::kPreserve;

// When preserving spaces before this token, start from this offset.
const char *preserved_space_start = nullptr;
absl::string_view::iterator preserved_space_start =
string_view_null_iterator();

InterTokenDecision() = default;

Expand Down
3 changes: 2 additions & 1 deletion verible/common/formatting/layout-optimizer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -701,7 +701,8 @@ class LayoutFunctionFactoryTest : public ::testing::Test,
// Setup pointers for OriginalLeadingSpaces()
auto must_wrap_token = must_wrap_pre_format_tokens.begin();
auto joinable_token = joinable_pre_format_tokens.begin();
const char *buffer_start = sample_.data();
absl::string_view sample_view(sample_);
absl::string_view::const_iterator buffer_start = sample_view.begin();
for (size_t i = 0; i < number_of_tokens_in_set; ++i) {
must_wrap_token->before.preserved_space_start = buffer_start;
joinable_token->before.preserved_space_start = buffer_start;
Expand Down
6 changes: 4 additions & 2 deletions verible/common/formatting/unwrapped-line-test-utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,10 @@ class UnwrappedLineMemoryHandler {
// Points to the end of joined_token_text_ string buffer.
// Same concept as TextStructureView::EOFToken().
TokenInfo EOFToken() const {
const absl::string_view s(joined_token_text_);
return TokenInfo(verible::TK_EOF, absl::string_view(s.end(), 0));
return TokenInfo(
verible::TK_EOF,
absl::string_view( // NOLINT might be easier with c++20
joined_token_text_.data() + joined_token_text_.length(), 0));
}

protected:
Expand Down
6 changes: 4 additions & 2 deletions verible/common/strings/range.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@

namespace verible {

absl::string_view make_string_view_range(const char *begin, const char *end) {
absl::string_view make_string_view_range(
absl::string_view::const_iterator begin,
absl::string_view::const_iterator end) {
const int length = std::distance(begin, end);
CHECK_GE(length, 0) << "Malformed string bounds.";
return absl::string_view(begin, length);
return absl::string_view(&*begin, length);
}

std::pair<int, int> SubstringOffsets(absl::string_view substring,
Expand Down
7 changes: 5 additions & 2 deletions verible/common/strings/range.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,11 @@ namespace verible {
// Construct a string_view from two end-points.
// string_view lacks the two-iterator constructor that (iterator) ranges and
// containers do.
// This exploits the fact that string_view's iterators are just const char*.
absl::string_view make_string_view_range(const char *begin, const char *end);
// Note, this can go with c++20, as that provides a std::string_view
// constructor for this.
absl::string_view make_string_view_range(
absl::string_view::const_iterator begin,
absl::string_view::const_iterator end);

// Returns [x,y] where superstring.substr(x, y-x) == substring.
// Precondition: substring must be a sub-range of superstring.
Expand Down
9 changes: 5 additions & 4 deletions verible/common/strings/rebase_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,12 @@ TEST(RebaseStringViewTest, NewSubstringNotAtFront) {
// Test that substring in the middle of old string is rebased correctly.
TEST(RebaseStringViewTest, UsingCharPointer) {
const absl::string_view text = "hello";
const absl::string_view new_base = "xxxhelloyyy";
const char *new_view = new_base.begin() + 3;
const char *new_base = "xxxhelloyyy";
const char *new_view_offset = new_base + 3;
absl::string_view text_view(text);
RebaseStringView(&text_view, new_view); // assume original length
EXPECT_TRUE(BoundsEqual(text_view, new_base.substr(3, 5)));
RebaseStringView(&text_view, new_view_offset); // assume original length
const absl::string_view new_base_view(new_base);
EXPECT_TRUE(BoundsEqual(text_view, new_base_view.substr(3, 5)));
}

// Test integration with substr() function rebases correctly.
Expand Down
26 changes: 15 additions & 11 deletions verible/common/text/text-structure.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ void TextStructureView::Clear() {
contents_ = contents_.substr(0, 0); // clear
}

static bool TokenLocationLess(const TokenInfo &token, const char *offset) {
static bool TokenLocationLess(const TokenInfo &token,
const absl::string_view::const_iterator offset) {
return token.text().begin() < offset;
}

Expand Down Expand Up @@ -408,8 +409,9 @@ absl::Status TextStructureView::SyntaxTreeConsistencyCheck() const {
VLOG(2) << __FUNCTION__;
// Check that first and last token in syntax_tree_ point to text
// inside contents_.
const char *const lower_bound = contents_.data();
const char *const upper_bound = lower_bound + contents_.length();
const absl::string_view::const_iterator lower_bound = contents_.begin();
const absl::string_view::const_iterator upper_bound =
lower_bound + contents_.length();
if (syntax_tree_ != nullptr) {
const SyntaxTreeLeaf *left = GetLeftmostLeaf(*syntax_tree_);
if (!left) return absl::OkStatus();
Expand Down Expand Up @@ -470,19 +472,21 @@ void TextStructureView::ConsumeDeferredExpansion(
TokenSequence::const_iterator *next_token_iter,
TokenStreamView::const_iterator *next_token_view_iter,
DeferredExpansion *expansion, TokenSequence *combined_tokens,
std::vector<int> *token_view_indices, const char *offset) {
std::vector<int> *token_view_indices,
absl::string_view::const_iterator offset) {
auto token_iter = *next_token_iter;
auto token_view_iter = *next_token_view_iter;
// Find the position up to each expansion point.
*next_token_iter =
std::lower_bound(token_iter, tokens_.cend(), offset,
[](const TokenInfo &token, const char *target) {
return std::distance(target, token.text().begin()) < 0;
});
*next_token_iter = std::lower_bound(
token_iter, tokens_.cend(), offset,
[](const TokenInfo &token, absl::string_view::const_iterator target) {
return std::distance(target, token.text().begin()) < 0;
});
CHECK(*next_token_iter != tokens_.cend());
*next_token_view_iter = std::lower_bound(
token_view_iter, tokens_view_.cend(), offset,
[](TokenStreamView::const_reference token_ref, const char *target) {
[](TokenStreamView::const_reference token_ref,
absl::string_view::const_iterator target) {
return std::distance(target, token_ref->text().begin()) < 0;
});
CHECK(*next_token_view_iter != tokens_view_.cend());
Expand All @@ -498,7 +502,7 @@ void TextStructureView::ConsumeDeferredExpansion(
TextStructureView &sub_data = ABSL_DIE_IF_NULL(subanalysis)->MutableData();
const absl::string_view sub_data_text(sub_data.Contents());
CHECK(!IsSubRange(sub_data_text, contents_));
CHECK_EQ(sub_data_text, absl::string_view(offset, sub_data_text.length()));
CHECK_EQ(sub_data_text, absl::string_view(&*offset, sub_data_text.length()));
CHECK_GE(offset, contents_.begin());
sub_data.RebaseTokensToSuperstring(contents_, sub_data_text,
std::distance(contents_.begin(), offset));
Expand Down
3 changes: 2 additions & 1 deletion verible/common/text/text-structure.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,8 @@ class TextStructureView {
TokenSequence::const_iterator *next_token_iter,
TokenStreamView::const_iterator *next_token_view_iter,
DeferredExpansion *expansion, TokenSequence *combined_tokens,
std::vector<int> *token_view_indices, const char *offset);
std::vector<int> *token_view_indices,
absl::string_view::const_iterator offset);

// Resets all fields. Only needed in tests.
void Clear();
Expand Down
2 changes: 1 addition & 1 deletion verible/common/text/token-info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ TokenInfo TokenInfo::EOFToken() {
}

TokenInfo TokenInfo::EOFToken(absl::string_view buffer) {
return {TK_EOF, absl::string_view(buffer.end(), 0)};
return {TK_EOF, absl::string_view(buffer.data() + buffer.length(), 0)};
}

bool TokenInfo::operator==(const TokenInfo &token) const {
Expand Down
Loading

0 comments on commit b22cdbe

Please sign in to comment.