forked from andrewprock/ustl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ustring.h
365 lines (343 loc) · 22.5 KB
/
ustring.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
// This file is part of the uSTL library, an STL implementation.
//
// Copyright (c) 2005 by Mike Sharov <[email protected]>
// This file is free software, distributed under the MIT License.
#pragma once
#include "memblock.h"
#include "utf8.h"
namespace ustl {
/// \class string ustring.h ustl.h
/// \ingroup Sequences
///
/// \brief STL basic_string<char> equivalent.
///
/// An STL container for text string manipulation.
/// Differences from C++ standard:
/// - string is a class, not a template. Wide characters are assumed to be
/// encoded with utf8 at all times except when rendering or editing,
/// where you would use a utf8 iterator.
/// - format member function - you can, of course use an \ref ostringstream,
/// which also have format functions, but most of the time this way
/// is more convenient. Because uSTL does not implement locales,
/// format is the only way to create localized strings.
/// - const char* cast operator. It is much clearer to use this than having
/// to type .c_str() every time.
/// - length returns the number of _characters_, not bytes.
/// This function is O(N), so use wisely.
///
/// An additional note is in order regarding the use of indexes. All indexes
/// passed in as arguments or returned by find are byte offsets, not character
/// offsets. Likewise, sizes are specified in bytes, not characters. The
/// rationale is that there is no way for you to know what is in the string.
/// There is no way for you to know how many characters are needed to express
/// one thing or another. The only thing you can do to a localized string is
/// search for delimiters and modify text between them as opaque blocks. If you
/// do anything else, you are hardcoding yourself into a locale! So stop it!
///
class string : public memblock {
public:
using value_type = char;
using uvalue_type = unsigned char;
using pointer = value_type*;
using const_pointer = const value_type*;
using wvalue_type = wchar_t;
using wpointer = wvalue_type*;
using const_wpointer = const wvalue_type*;
using iterator = pointer;
using const_iterator = const_pointer;
using reference = value_type&;
using const_reference = value_type;
using reverse_iterator = ::ustl::reverse_iterator<iterator>;
using const_reverse_iterator = ::ustl::reverse_iterator<const_iterator>;
using utf8_iterator = utf8in_iterator<const_iterator>;
using pos_type = size_type;
using initlist_t = std::initializer_list<value_type>;
static constexpr const pos_type npos = INT_MAX; ///< Value that means the end of string.
public:
constexpr string (void) noexcept : memblock () { relink ("",0); }
string (const string& s);
inline string (const string& s, pos_type o, size_type n = npos);
inline explicit string (const cmemlink& l);
string (const_pointer s) noexcept;
inline string (const_pointer s, size_type len);
inline string (const_pointer s1, const_pointer s2);
string (size_type n, value_type c);
inline string (string&& v) : memblock (move(v)) {}
inline string (initlist_t v) : memblock() { assign (v.begin(), v.size()); }
inline ~string (void) noexcept { }
constexpr pointer data (void) { return string::pointer (memblock::data()); }
constexpr const_pointer data (void) const { return string::const_pointer (memblock::data()); }
constexpr const_pointer c_str (void) const { return string::const_pointer (memblock::cdata()); }
constexpr size_type max_size (void) const { size_type s (memblock::max_size()); return s - !!s; }
constexpr size_type capacity (void) const { size_type c (memblock::capacity()); return c - !!c; }
void resize (size_type n);
inline void resize (size_type n, value_type c);
inline void clear (void) { resize (0); }
constexpr iterator begin (void) { return iterator (memblock::begin()); }
constexpr const_iterator begin (void) const { return const_iterator (memblock::begin()); }
constexpr const_iterator cbegin (void) const { return begin(); }
constexpr iterator end (void) { return iterator (memblock::end()); }
constexpr const_iterator end (void) const { return const_iterator (memblock::end()); }
constexpr const_iterator cend (void) const { return end(); }
constexpr reverse_iterator rbegin (void) { return reverse_iterator (end()); }
constexpr const_reverse_iterator rbegin (void) const { return const_reverse_iterator (end()); }
constexpr const_reverse_iterator crbegin (void) const { return rbegin(); }
constexpr reverse_iterator rend (void) { return reverse_iterator (begin()); }
constexpr const_reverse_iterator rend (void) const { return const_reverse_iterator (begin()); }
constexpr const_reverse_iterator crend (void) const { return rend(); }
inline utf8_iterator utf8_begin (void) const { return utf8_iterator (begin()); }
inline utf8_iterator utf8_end (void) const { return utf8_iterator (end()); }
constexpr const_reference at (pos_type pos) const { assert (pos <= size() && begin()); return begin()[pos]; }
constexpr reference at (pos_type pos) { assert (pos <= size() && begin()); return begin()[pos]; }
constexpr const_iterator iat (pos_type pos) const { return begin() + (__builtin_constant_p(pos) && pos >= npos ? size() : min(size_type(pos),size())); }
constexpr iterator iat (pos_type pos) { return const_cast<iterator>(const_cast<const string*>(this)->iat(pos)); }
const_iterator wiat (pos_type i) const noexcept;
inline iterator wiat (pos_type i) { return const_cast<iterator>(const_cast<const string*>(this)->wiat(i)); }
constexpr const_reference front (void) const { return at(0); }
constexpr reference front (void) { return at(0); }
constexpr const_reference back (void) const { return at(size()-1); }
constexpr reference back (void) { return at(size()-1); }
inline size_type length (void) const { return distance (utf8_begin(), utf8_end()); }
inline string& append (const_iterator i1, const_iterator i2) { return append (i1, distance (i1, i2)); }
string& append (const_pointer s, size_type len);
string& append (const_pointer s);
string& append (size_type n, value_type c);
inline string& append (size_type n, wvalue_type c) { insert (size(), n, c); return *this; }
inline string& append (const_wpointer s1, const_wpointer s2) { insert (size(), s1, s2); return *this; }
inline string& append (const_wpointer s) { const_wpointer se (s); for (;se&&*se;++se) {} return append (s, se); }
inline string& append (const string& s) { return append (s.begin(), s.end()); }
inline string& append (const string& s,pos_type o,size_type n) { return append (s.iat(o), s.iat(o+n)); }
inline string& append (initlist_t v) { return append (v.begin(), v.size()); }
inline void push_back (value_type c) { resize(size()+1); end()[-1] = c; }
inline void push_back (wvalue_type c) { append (1, c); }
inline void pop_back (void) { resize (size()-1); }
inline string& assign (const_iterator i1, const_iterator i2) { return assign (i1, distance (i1, i2)); }
string& assign (const_pointer s, size_type len);
string& assign (const_pointer s);
inline string& assign (const_wpointer s1, const_wpointer s2) { clear(); return append (s1, s2); }
inline string& assign (const_wpointer s1) { clear(); return append (s1); }
inline string& assign (const string& s) { return assign (s.begin(), s.end()); }
inline string& assign (const string& s, pos_type o, size_type n) { return assign (s.iat(o), s.iat(o+n)); }
inline string& assign (size_type n, value_type c) { clear(); return append (n, c); }
inline string& assign (size_type n, wvalue_type c) { clear(); return append (n, c); }
inline string& assign (string&& v) { swap (v); return *this; }
inline string& assign (initlist_t v) { return assign (v.begin(), v.size()); }
size_type copy (pointer p, size_type n, pos_type pos = 0) const noexcept;
inline size_type copyto (pointer p, size_type n, pos_type pos = 0) const noexcept { size_type bc = copy(p,n-1,pos); p[bc]=0; return bc; }
inline int compare (const string& s) const { return compare (begin(), end(), s.begin(), s.end()); }
inline int compare (pos_type start, size_type len, const string& s) const { return compare (iat(start), iat(start+len), s.begin(), s.end()); }
inline int compare (pos_type s1, size_type l1, const string& s, pos_type s2, size_type l2) const { return compare (iat(s1), iat(s1+l1), s.iat(s2), s.iat(s2+l2)); }
inline int compare (const_pointer s) const { return compare (begin(), end(), s, s + strlen(s)); }
inline int compare (pos_type s1, size_type l1, const_pointer s, size_type l2) const { return compare (iat(s1), iat(s1+l1), s, s+l2); }
inline int compare (pos_type s1, size_type l1, const_pointer s) const { return compare (s1, l1, s, strlen(s)); }
static int compare (const_iterator first1, const_iterator last1, const_iterator first2, const_iterator last2) noexcept;
inline constexpr operator const value_type* (void) const;
inline constexpr operator value_type* (void);
inline const string& operator= (const string& s) { return assign (s.begin(), s.end()); }
inline const string& operator= (const_reference c) { return assign (&c, 1); }
inline const string& operator= (const_pointer s) { return assign (s); }
inline const string& operator= (const_wpointer s) { return assign (s); }
inline string& operator= (string&& v) { return assign (move(v)); }
inline string& operator= (initlist_t v) { return assign (v.begin(), v.size()); }
inline const string& operator+= (const string& s) { return append (s.begin(), s.size()); }
inline const string& operator+= (value_type c) { push_back(c); return *this; }
inline const string& operator+= (const_pointer s) { return append (s); }
inline const string& operator+= (wvalue_type c) { return append (1, c); }
inline const string& operator+= (uvalue_type c) { return operator+= (value_type(c)); }
inline const string& operator+= (const_wpointer s) { return append (s); }
inline string& operator+= (initlist_t v) { return append (v.begin(), v.size()); }
inline string operator+ (const string& s) const;
inline bool operator== (const string& s) const { return memblock::operator== (s); }
bool operator== (const_pointer s) const noexcept;
constexpr bool operator== (value_type c) const { return size() == 1 && c == at(0); }
inline bool operator== (uvalue_type c) const { return operator== (value_type(c)); }
inline bool operator!= (const string& s) const { return !operator== (s); }
inline bool operator!= (const_pointer s) const { return !operator== (s); }
inline bool operator!= (value_type c) const { return !operator== (c); }
inline bool operator!= (uvalue_type c) const { return !operator== (c); }
inline bool operator< (const string& s) const { return 0 > compare (s); }
inline bool operator< (const_pointer s) const { return 0 > compare (s); }
inline bool operator< (value_type c) const { return 0 > compare (begin(), end(), &c, &c + 1); }
inline bool operator< (uvalue_type c) const { return operator< (value_type(c)); }
inline bool operator> (const_pointer s) const { return 0 < compare (s); }
inline string& insert (pos_type ip, size_type n, value_type c) { insert (iat(ip), n, c); return *this; }
inline string& insert (pos_type ip, const_pointer s) { insert (iat(ip), s, s + strlen(s)); return *this; }
inline string& insert (pos_type ip, const_pointer s, size_type nlen) { insert (iat(ip), s, s + nlen); return *this; }
inline string& insert (pos_type ip, const string& s) { insert (iat(ip), s.c_str(), s.size()); return *this; }
inline string& insert (pos_type ip, const string& s, size_type sp, size_type slen) { insert (iat(ip), s.iat(sp), s.iat(sp + slen)); return *this; }
string& insert (pos_type ip, size_type n, wvalue_type c);
string& insert (pos_type ip, const_wpointer first, const_wpointer last, size_type n = 1);
inline string& insert (int ip, size_type n, value_type c) { insert (pos_type(ip), n, c); return *this; }
inline string& insert (int ip, const_pointer s, size_type nlen) { insert (pos_type(ip), s, nlen); return *this; }
iterator insert (const_iterator start, size_type n, value_type c);
inline iterator insert (const_iterator start, value_type c) { return insert (start, 1u, c); }
iterator insert (const_iterator start, const_pointer s, size_type n);
iterator insert (const_iterator start, const_pointer first, const_iterator last, size_type n = 1);
inline iterator insert (const_iterator ip, initlist_t v) { return insert (ip, v.begin(), v.end()); }
iterator erase (const_iterator epo, size_type n = 1);
string& erase (pos_type epo = 0, size_type n = npos);
inline string& erase (int epo, size_type n = npos) { return erase (pos_type(epo), n); }
inline iterator erase (const_iterator first, const_iterator last) { return erase (first, size_type(distance(first,last))); }
inline iterator eraser (pos_type first, pos_type last) { return erase (iat(first), iat(last)); }
string& replace (const_iterator first, const_iterator last, const_pointer i1, const_pointer i2, size_type n);
template <typename InputIt>
string& replace (const_iterator first, const_iterator last, InputIt first2, InputIt last2) { return replace (first, last, first2, last2, 1); }
inline string& replace (const_iterator first, const_iterator last, const string& s) { return replace (first, last, s.begin(), s.end()); }
string& replace (const_iterator first, const_iterator last, const_pointer s);
inline string& replace (const_iterator first, const_iterator last, const_pointer s, size_type slen) { return replace (first, last, s, s + slen); }
inline string& replace (const_iterator first, const_iterator last, size_type n, value_type c) { return replace (first, last, &c, &c + 1, n); }
inline string& replace (pos_type rp, size_type n, const string& s) { return replace (iat(rp), iat(rp + n), s); }
inline string& replace (pos_type rp, size_type n, const string& s, uoff_t sp, size_type slen) { return replace (iat(rp), iat(rp + n), s.iat(sp), s.iat(sp + slen)); }
inline string& replace (pos_type rp, size_type n, const_pointer s, size_type slen) { return replace (iat(rp), iat(rp + n), s, s + slen); }
inline string& replace (pos_type rp, size_type n, const_pointer s) { return replace (iat(rp), iat(rp + n), string(s)); }
inline string& replace (pos_type rp, size_type n, size_type count, value_type c) { return replace (iat(rp), iat(rp + n), count, c); }
inline string& replace (const_iterator first, const_iterator last, initlist_t v) { return replace (first, last, v.begin(), v.end()); }
inline string substr (pos_type o = 0, size_type n = npos) const { return string (*this, o, n); }
constexpr void swap (string& v) { memblock::swap (v); }
pos_type find (value_type c, pos_type pos = 0) const noexcept;
pos_type find (const string& s, pos_type pos = 0) const noexcept;
inline pos_type find (uvalue_type c, pos_type pos = 0) const noexcept { return find (value_type(c), pos); }
inline pos_type find (const_pointer p, pos_type pos, size_type count) const { string sp; sp.link (p,count); return find (sp, pos); }
pos_type rfind (value_type c, pos_type pos = npos) const noexcept;
pos_type rfind (const string& s, pos_type pos = npos) const noexcept;
inline pos_type rfind (uvalue_type c, pos_type pos = npos) const noexcept { return rfind (value_type(c), pos); }
inline pos_type rfind (const_pointer p, pos_type pos, size_type count) const { string sp; sp.link (p,count); return rfind (sp, pos); }
pos_type find_first_of (const string& s, pos_type pos = 0) const noexcept;
inline pos_type find_first_of (value_type c, pos_type pos = 0) const { string sp (1, c); return find_first_of(sp,pos); }
inline pos_type find_first_of (uvalue_type c, pos_type pos = 0) const { return find_first_of (value_type(c), pos); }
inline pos_type find_first_of (const_pointer p, pos_type pos, size_type count) const { string sp; sp.link (p,count); return find_first_of (sp, pos); }
pos_type find_first_not_of (const string& s, pos_type pos = 0) const noexcept;
inline pos_type find_first_not_of (value_type c, pos_type pos = 0) const { string sp (1, c); return find_first_not_of(sp,pos); }
inline pos_type find_first_not_of (uvalue_type c, pos_type pos = 0) const { return find_first_not_of (value_type(c), pos); }
inline pos_type find_first_not_of (const_pointer p, pos_type pos, size_type count) const { string sp; sp.link (p,count); return find_first_not_of (sp, pos); }
pos_type find_last_of (const string& s, pos_type pos = npos) const noexcept;
inline pos_type find_last_of (value_type c, pos_type pos = npos) const { string sp (1, c); return find_last_of(sp,pos); }
inline pos_type find_last_of (uvalue_type c, pos_type pos = npos) const { return find_last_of (value_type(c), pos); }
inline pos_type find_last_of (const_pointer p, pos_type pos, size_type count) const { string sp; sp.link (p,count); return find_last_of (sp, pos); }
pos_type find_last_not_of (const string& s, pos_type pos = npos) const noexcept;
inline pos_type find_last_not_of (value_type c, pos_type pos = npos) const { string sp (1, c); return find_last_not_of(sp,pos); }
inline pos_type find_last_not_of (uvalue_type c, pos_type pos = npos) const { return find_last_not_of (value_type(c), pos); }
inline pos_type find_last_not_of (const_pointer p, pos_type pos, size_type count) const { string sp; sp.link (p,count); return find_last_not_of (sp, pos); }
int vformat (const char* fmt, va_list args);
int format (const char* fmt, ...) __attribute__((__format__(__printf__, 2, 3)));
void read (istream&);
void write (ostream& os) const;
size_t stream_size (void) const noexcept;
static hashvalue_t hash (const char* f1, const char* l1) noexcept;
private:
virtual size_type minimumFreeCapacity (void) const noexcept final override __attribute__((const));
};
//----------------------------------------------------------------------
/// Assigns itself the value of string \p s
inline string::string (const cmemlink& s)
: memblock ()
{
assign (const_iterator (s.begin()), s.size());
}
/// Assigns itself a [o,o+n) substring of \p s.
inline string::string (const string& s, pos_type o, size_type n)
: memblock()
{
assign (s, o, n);
}
/// Copies the value of \p s of length \p len into itself.
inline string::string (const_pointer s, size_type len)
: memblock ()
{
assign (s, len);
}
/// Copies into itself the string data between \p s1 and \p s2
inline string::string (const_pointer s1, const_pointer s2)
: memblock ()
{
assert (s1 <= s2 && "Negative ranges result in memory allocation errors.");
assign (s1, s2);
}
/// Returns the pointer to the first character.
inline constexpr string::operator const string::value_type* (void) const
{
assert ((!end() || !*end()) && "This string is linked to data that is not 0-terminated. This may cause serious security problems. Please assign the data instead of linking.");
return begin();
}
/// Returns the pointer to the first character.
inline constexpr string::operator string::value_type* (void)
{
assert ((end() && !*end()) && "This string is linked to data that is not 0-terminated. This may cause serious security problems. Please assign the data instead of linking.");
return begin();
}
/// Concatenates itself with \p s
inline string string::operator+ (const string& s) const
{
string result (*this);
result += s;
return result;
}
/// Resize to \p n and fill new entries with \p c
inline void string::resize (size_type n, value_type c)
{
const size_type oldn = size();
resize (n);
fill_n (iat(oldn), max(ssize_t(n-oldn),0), c);
}
//----------------------------------------------------------------------
// Operators needed to avoid comparing pointer to pointer
#define PTR_STRING_CMP(op, impl) \
inline bool op (const char* s1, const string& s2) { return impl; }
PTR_STRING_CMP (operator==, (s2 == s1))
PTR_STRING_CMP (operator!=, (s2 != s1))
PTR_STRING_CMP (operator<, (s2 > s1))
PTR_STRING_CMP (operator<=, (s2 >= s1))
PTR_STRING_CMP (operator>, (s2 < s1))
PTR_STRING_CMP (operator>=, (s2 <= s1))
#undef PTR_STRING_CMP
inline string operator+ (const char* cs, const string& ss) { string r; r.reserve (strlen(cs)+ss.size()); r += cs; r += ss; return r; }
//----------------------------------------------------------------------
inline hashvalue_t hash_value (const char* first, const char* last)
{ return string::hash (first, last); }
inline hashvalue_t hash_value (const char* v)
{ return hash_value (v, v + strlen(v)); }
//----------------------------------------------------------------------
// String-number conversions
#define STRING_TO_INT_CONVERTER(name,type,func) \
inline type name (const string& str, size_t* idx = nullptr, int base = 10) \
{ \
const char* sp = str.c_str(); \
char* endp = nullptr; \
type r = func (sp, idx ? &endp : nullptr, base);\
if (idx) \
*idx = endp - sp; \
return r; \
}
STRING_TO_INT_CONVERTER(stoi,int,strtol)
STRING_TO_INT_CONVERTER(stol,long,strtol)
STRING_TO_INT_CONVERTER(stoul,unsigned long,strtoul)
STRING_TO_INT_CONVERTER(stoll,long long,strtoll)
STRING_TO_INT_CONVERTER(stoull,unsigned long long,strtoull)
#undef STRING_TO_INT_CONVERTER
#define STRING_TO_FLOAT_CONVERTER(name,type,func) \
inline type name (const string& str, size_t* idx = nullptr) \
{ \
const char* sp = str.c_str(); \
char* endp = nullptr; \
type r = func (sp, idx ? &endp : nullptr);\
if (idx) \
*idx = endp - sp; \
return r; \
}
STRING_TO_FLOAT_CONVERTER(stof,float,strtof)
STRING_TO_FLOAT_CONVERTER(stod,double,strtod)
STRING_TO_FLOAT_CONVERTER(stold,long double,strtold)
#undef STRING_TO_FLOAT_CONVERTER
#define NUMBER_TO_STRING_CONVERTER(type,fmts)\
inline string to_string (type v) { string r(15, '\0'); r.format(fmts,v); return r; }
NUMBER_TO_STRING_CONVERTER(int,"%d")
NUMBER_TO_STRING_CONVERTER(unsigned,"%u")
NUMBER_TO_STRING_CONVERTER(long,"%ld")
NUMBER_TO_STRING_CONVERTER(unsigned long,"%lu")
NUMBER_TO_STRING_CONVERTER(long long,"%lld")
NUMBER_TO_STRING_CONVERTER(unsigned long long,"%llu")
NUMBER_TO_STRING_CONVERTER(float,"%f")
NUMBER_TO_STRING_CONVERTER(double,"%lf")
NUMBER_TO_STRING_CONVERTER(long double,"%Lf")
#undef NUMBER_TO_STRING_CONVERTER
} // namespace ustl