diff --git a/icu4c/source/common/common.vcxproj.filters b/icu4c/source/common/common.vcxproj.filters
index 1faff8765d33..72f02de9cc3b 100644
--- a/icu4c/source/common/common.vcxproj.filters
+++ b/icu4c/source/common/common.vcxproj.filters
@@ -1258,6 +1258,9 @@
strings
+
+ strings
+
strings
diff --git a/icu4c/source/common/unicode/utf16cppiter.h b/icu4c/source/common/unicode/utf16cppiter.h
index 5fb0b87dae06..582ce1d1b6de 100644
--- a/icu4c/source/common/unicode/utf16cppiter.h
+++ b/icu4c/source/common/unicode/utf16cppiter.h
@@ -39,92 +39,122 @@ enum U16IllFormedBehavior {
U16_BEHAVIOR_SURROGATE
};
-// Validating iterator over the code points in a Unicode 16-bit string.
-// TODO: all @draft ICU 76
+/**
+ * A code unit sequence for one code point returned by U16Iterator.
+ *
+ * TODO: check doxygen syntax for template parameters
+ * @param Unit16 char16_t or uint16_t or (on Windows) wchar_t
+ * @draft ICU 76
+ */
+template
+class U16OneSeq {
+public:
+ U16OneSeq(const U16OneSeq &other) = default;
+
+ const Unit16 *data() { return p; }
+ int32_t length() const { return len; }
+
+ std::basic_string_view stringView() const {
+ return std::basic_string_view(p, len);
+ }
+
+ bool isWellFormed() const { return ok; }
+
+ UChar32 codePoint() const { return c; }
+
+ // TODO: std::optional maybeCodePoint() const ? (nullopt if !ok)
+
+private:
+ // TODO: Why can't we just use Unit16 here?
+ // error: declaration of 'Unit16' shadows template parameter
+ template
+ friend class U16Iterator;
+
+ U16OneSeq(const Unit16 *p) : p(p) {}
+
+ void fwd1() { p += len; }
+
+ void readOneForward(const Unit16 *limit) {
+ if (p == limit) {
+ len = 0;
+ return;
+ }
+ // see U16_NEXT_OR_FFFD()
+ c = *p;
+ len = 1;
+ ok = true;
+ if (U16_IS_SURROGATE(c)) {
+ uint16_t c2;
+ if (U16_IS_SURROGATE_LEAD(c) && (p + 1) != limit && U16_IS_TRAIL(c2 = p[1])) {
+ c = U16_GET_SUPPLEMENTARY(c, c2);
+ len = 2;
+ } else {
+ // TODO: U16IllFormedBehavior
+ c = 0xfffd;
+ ok = false;
+ }
+ }
+ }
+
+ const Unit16 *p;
+ UChar32 c = 0;
+ int8_t len = 0;
+ bool ok = false;
+};
+
+/**
+ * Validating iterator over the code points in a Unicode 16-bit string.
+ *
+ * TODO: check doxygen syntax for template parameters
+ * @param Unit16 char16_t or uint16_t or (on Windows) wchar_t
+ * @param U16IllFormedBehavior TODO
+ * @draft ICU 76
+ */
template
class U16Iterator {
public:
// TODO: make private, make friends
U16Iterator(const Unit16 *start, const Unit16 *p, const Unit16 *limit) :
- start(start), p(p), limit(limit) {
- if (p != limit) {
- readOneForward();
- }
+ start(start), limit(limit), seq(p) {
+ seq.readOneForward(limit);
}
// TODO: We might try to support limit==nullptr, similar to U16_ macros supporting length<0.
// Test pointers for == or != but not < or >.
U16Iterator(const U16Iterator &other) = default;
- U16Iterator(U16Iterator &&other) noexcept = default;
- bool operator==(const U16Iterator &other) const { return p == other.p; }
+ bool operator==(const U16Iterator &other) const { return seq.p == other.seq.p; }
bool operator!=(const U16Iterator &other) const { return !operator==(other); }
- UChar32 operator*() const {
- return c;
- }
-
- // TODO: good function names?
- // It would be nice to avoid a prefix like "current", "one", "cp",
- // but just length() on the iterator could be confusing.
- int32_t currentLength() const { return len; }
-
- std::basic_string_view currentView() const {
- return std::basic_string_view(p, len);
+ const U16OneSeq &operator*() const {
+ return seq;
}
- bool currentIsWellFormed() const { return ok; }
-
U16Iterator &operator++() { // pre-increment
// TODO: think about switching directions etc.
- // Assume that readOneForward() was called and set `len`.
+ // Assume that readOneForward() was called and set seq.len.
// Skip the current code point, then read the next one.
- p += len;
- if (p != limit) {
- readOneForward();
- }
+ seq.fwd1();
+ seq.readOneForward(limit);
return *this;
}
U16Iterator operator++(int) { // post-increment
U16Iterator result(*this);
// TODO: think about switching directions etc.
- // Assume that readOneForward() was called and set `len`.
+ // Assume that readOneForward() was called and set seq.len.
// Skip the current code point, then read the next one.
- p += len;
- if (p != limit) {
- readOneForward();
- }
+ seq.fwd1();
+ seq.readOneForward(limit);
return result;
}
private:
- void readOneForward() {
- // see U16_NEXT_OR_FFFD()
- c = *p;
- len = 1;
- ok = true;
- if (U16_IS_SURROGATE(c)) {
- uint16_t c2;
- if (U16_IS_SURROGATE_LEAD(c) && (p + 1) != limit && U16_IS_TRAIL(c2 = p[1])) {
- c = U16_GET_SUPPLEMENTARY(c, c2);
- len = 2;
- } else {
- // TODO: U16IllFormedBehavior
- c = 0xfffd;
- ok = false;
- }
- }
- }
-
// In a validating iterator, we need start & limit so that when we read a code point
// (forward or backward) we can test if there are enough code units.
const Unit16 *start;
- const Unit16 *p;
const Unit16 *limit;
- UChar32 c = 0;
- int8_t len = 0;
- bool ok = false;
+ U16OneSeq seq;
};
// ------------------------------------------------------------------------- ***
diff --git a/icu4c/source/test/intltest/intltest.vcxproj b/icu4c/source/test/intltest/intltest.vcxproj
index b58b29b3d4e7..8d9bba021508 100644
--- a/icu4c/source/test/intltest/intltest.vcxproj
+++ b/icu4c/source/test/intltest/intltest.vcxproj
@@ -223,6 +223,7 @@
+
diff --git a/icu4c/source/test/intltest/intltest.vcxproj.filters b/icu4c/source/test/intltest/intltest.vcxproj.filters
index d5c23d5e4cb5..0abc4608d1a6 100644
--- a/icu4c/source/test/intltest/intltest.vcxproj.filters
+++ b/icu4c/source/test/intltest/intltest.vcxproj.filters
@@ -490,6 +490,9 @@
strings
+
+ strings
+
strings
diff --git a/icu4c/source/test/intltest/utfcppitertest.cpp b/icu4c/source/test/intltest/utfcppitertest.cpp
index c0a914b579c1..f71f23327386 100644
--- a/icu4c/source/test/intltest/utfcppitertest.cpp
+++ b/icu4c/source/test/intltest/utfcppitertest.cpp
@@ -14,8 +14,9 @@
// https://en.cppreference.com/w/cpp/string/basic_string_view/operator%22%22sv
using namespace std::string_view_literals;
-using U_HEADER_ONLY_NAMESPACE::U16Iterator;
using U_HEADER_ONLY_NAMESPACE::U16_BEHAVIOR_NEGATIVE;
+using U_HEADER_ONLY_NAMESPACE::U16Iterator;
+using U_HEADER_ONLY_NAMESPACE::U16OneSeq;
class U16IteratorTest : public IntlTest {
public:
@@ -44,14 +45,17 @@ void U16IteratorTest::testExperiment() {
std::u16string_view good(u"abçカ🚴"sv);
const char16_t *goodLimit = good.data() + good.length();
U16Iterator goodIter(good.data(), good.data(), goodLimit);
- assertEquals("goodIter[0] *", u'a', *goodIter);
+ assertEquals("goodIter[0] * codePoint()", u'a', (*goodIter).codePoint());
++goodIter; // pre-increment
- assertEquals("goodIter[1] *", u'b', *goodIter);
+ assertEquals("goodIter[1] * codePoint()", u'b', (*goodIter).codePoint());
++goodIter;
- assertEquals("goodIter[2] *", u'ç', *goodIter++); // post-increment
- assertEquals("goodIter[3] *", u'カ', *goodIter);
+ assertEquals("goodIter[2] * codePoint()", u'ç', (*goodIter++).codePoint()); // post-increment
+ assertEquals("goodIter[3] * codePoint()", u'カ', (*goodIter).codePoint());
++goodIter;
- assertEquals("goodIter[4] *", U'🚴', *goodIter++);
+ const U16OneSeq &seq = *goodIter++;
+ assertEquals("goodIter[4] * codePoint()", U'🚴', seq.codePoint());
+ assertEquals("goodIter[4] * length()", 2, seq.length());
+ assertTrue("goodIter[4] * stringView()", seq.stringView() == u"🚴"sv);
U16Iterator goodEndIter(good.data(), goodLimit, goodLimit);
assertTrue("goodIter == goodEndIter", goodIter == goodEndIter);