From 500274ba945ba7c4edd6457f690d7ee2748bf2fc Mon Sep 17 00:00:00 2001 From: CharlesMengCA <58993776+CharlesMengCA@users.noreply.github.com> Date: Mon, 17 Jan 2022 01:24:50 -0500 Subject: [PATCH] Add files via upload --- rapidjson/document.h | 2 +- rapidjson/internal/biginteger.h | 19 +- rapidjson/internal/diyfp.h | 4 + rapidjson/internal/dtoa.h | 10 +- rapidjson/internal/strfunc.h | 14 + rapidjson/internal/strtod.h | 15 +- rapidjson/pointer.h | 67 +++++ rapidjson/reader.h | 38 +-- rapidjson/schema.h | 345 ++++++++++++++++------- rapidjson/uri.h | 481 ++++++++++++++++++++++++++++++++ 10 files changed, 862 insertions(+), 133 deletions(-) create mode 100644 rapidjson/uri.h diff --git a/rapidjson/document.h b/rapidjson/document.h index 057b37a..e2cc600 100644 --- a/rapidjson/document.h +++ b/rapidjson/document.h @@ -1951,7 +1951,7 @@ class GenericValue { case kArrayType: if (RAPIDJSON_UNLIKELY(!handler.StartArray())) return false; - for (const GenericValue* v = Begin(); v != End(); ++v) + for (ConstValueIterator v = Begin(); v != End(); ++v) if (RAPIDJSON_UNLIKELY(!v->Accept(handler))) return false; return handler.EndArray(data_.a.size); diff --git a/rapidjson/internal/biginteger.h b/rapidjson/internal/biginteger.h index 1245578..af48738 100644 --- a/rapidjson/internal/biginteger.h +++ b/rapidjson/internal/biginteger.h @@ -19,7 +19,11 @@ #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && defined(_M_AMD64) #include // for _umul128 +#if !defined(_ARM64EC_) #pragma intrinsic(_umul128) +#else +#pragma comment(lib,"softintrin") +#endif #endif RAPIDJSON_NAMESPACE_BEGIN @@ -37,7 +41,8 @@ class BigInteger { digits_[0] = u; } - BigInteger(const char* decimals, size_t length) : count_(1) { + template + BigInteger(const Ch* decimals, size_t length) : count_(1) { RAPIDJSON_ASSERT(length > 0); digits_[0] = 0; size_t i = 0; @@ -221,7 +226,8 @@ class BigInteger { bool IsZero() const { return count_ == 1 && digits_[0] == 0; } private: - void AppendDecimal64(const char* begin, const char* end) { + template + void AppendDecimal64(const Ch* begin, const Ch* end) { uint64_t u = ParseUint64(begin, end); if (IsZero()) *this = u; @@ -236,11 +242,12 @@ class BigInteger { digits_[count_++] = digit; } - static uint64_t ParseUint64(const char* begin, const char* end) { + template + static uint64_t ParseUint64(const Ch* begin, const Ch* end) { uint64_t r = 0; - for (const char* p = begin; p != end; ++p) { - RAPIDJSON_ASSERT(*p >= '0' && *p <= '9'); - r = r * 10u + static_cast(*p - '0'); + for (const Ch* p = begin; p != end; ++p) { + RAPIDJSON_ASSERT(*p >= Ch('0') && *p <= Ch('9')); + r = r * 10u + static_cast(*p - Ch('0')); } return r; } diff --git a/rapidjson/internal/diyfp.h b/rapidjson/internal/diyfp.h index a40797e..f7d4653 100644 --- a/rapidjson/internal/diyfp.h +++ b/rapidjson/internal/diyfp.h @@ -25,7 +25,11 @@ #if defined(_MSC_VER) && defined(_M_AMD64) && !defined(__INTEL_COMPILER) #include +#if !defined(_ARM64EC_) #pragma intrinsic(_umul128) +#else +#pragma comment(lib,"softintrin") +#endif #endif RAPIDJSON_NAMESPACE_BEGIN diff --git a/rapidjson/internal/dtoa.h b/rapidjson/internal/dtoa.h index 621402f..9f6ae3b 100644 --- a/rapidjson/internal/dtoa.h +++ b/rapidjson/internal/dtoa.h @@ -58,7 +58,11 @@ inline int CountDecimalDigit32(uint32_t n) { } inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buffer, int* len, int* K) { - static const uint32_t kPow10[] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 }; + static const uint64_t kPow10[] = { 1U, 10U, 100U, 1000U, 10000U, 100000U, 1000000U, 10000000U, 100000000U, + 1000000000U, 10000000000U, 100000000000U, 1000000000000U, + 10000000000000U, 100000000000000U, 1000000000000000U, + 10000000000000000U, 100000000000000000U, 1000000000000000000U, + 10000000000000000000U }; const DiyFp one(uint64_t(1) << -Mp.e, Mp.e); const DiyFp wp_w = Mp - W; uint32_t p1 = static_cast(Mp.f >> -one.e); @@ -86,7 +90,7 @@ inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buff uint64_t tmp = (static_cast(p1) << -one.e) + p2; if (tmp <= delta) { *K += kappa; - GrisuRound(buffer, *len, delta, tmp, static_cast(kPow10[kappa]) << -one.e, wp_w.f); + GrisuRound(buffer, *len, delta, tmp, kPow10[kappa] << -one.e, wp_w.f); return; } } @@ -103,7 +107,7 @@ inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buff if (p2 < delta) { *K += kappa; int index = -kappa; - GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * (index < 9 ? kPow10[index] : 0)); + GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * (index < 20 ? kPow10[index] : 0)); return; } } diff --git a/rapidjson/internal/strfunc.h b/rapidjson/internal/strfunc.h index baecb6c..b698a8f 100644 --- a/rapidjson/internal/strfunc.h +++ b/rapidjson/internal/strfunc.h @@ -45,6 +45,20 @@ inline SizeType StrLen(const wchar_t* s) { return SizeType(std::wcslen(s)); } +//! Custom strcmpn() which works on different character types. +/*! \tparam Ch Character type (e.g. char, wchar_t, short) + \param s1 Null-terminated input string. + \param s2 Null-terminated input string. + \return 0 if equal +*/ +template +inline int StrCmp(const Ch* s1, const Ch* s2) { + RAPIDJSON_ASSERT(s1 != 0); + RAPIDJSON_ASSERT(s2 != 0); + while(*s1 && (*s1 == *s2)) { s1++; s2++; } + return static_cast(*s1) < static_cast(*s2) ? -1 : static_cast(*s1) > static_cast(*s2); +} + //! Returns number of code points in a encoded string. template bool CountStringCodePoint(const typename Encoding::Ch* s, SizeType length, SizeType* outCount) { diff --git a/rapidjson/internal/strtod.h b/rapidjson/internal/strtod.h index d61a67a..55f0e38 100644 --- a/rapidjson/internal/strtod.h +++ b/rapidjson/internal/strtod.h @@ -128,17 +128,18 @@ inline bool StrtodFast(double d, int p, double* result) { } // Compute an approximation and see if it is within 1/2 ULP -inline bool StrtodDiyFp(const char* decimals, int dLen, int dExp, double* result) { +template +inline bool StrtodDiyFp(const Ch* decimals, int dLen, int dExp, double* result) { uint64_t significand = 0; int i = 0; // 2^64 - 1 = 18446744073709551615, 1844674407370955161 = 0x1999999999999999 for (; i < dLen; i++) { if (significand > RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || - (significand == RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) && decimals[i] > '5')) + (significand == RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) && decimals[i] > Ch('5'))) break; - significand = significand * 10u + static_cast(decimals[i] - '0'); + significand = significand * 10u + static_cast(decimals[i] - Ch('0')); } - if (i < dLen && decimals[i] >= '5') // Rounding + if (i < dLen && decimals[i] >= Ch('5')) // Rounding significand++; int remaining = dLen - i; @@ -205,7 +206,8 @@ inline bool StrtodDiyFp(const char* decimals, int dLen, int dExp, double* result return halfWay - static_cast(error) >= precisionBits || precisionBits >= halfWay + static_cast(error); } -inline double StrtodBigInteger(double approx, const char* decimals, int dLen, int dExp) { +template +inline double StrtodBigInteger(double approx, const Ch* decimals, int dLen, int dExp) { RAPIDJSON_ASSERT(dLen >= 0); const BigInteger dInt(decimals, static_cast(dLen)); Double a(approx); @@ -223,7 +225,8 @@ inline double StrtodBigInteger(double approx, const char* decimals, int dLen, in return a.NextPositiveDouble(); } -inline double StrtodFullPrecision(double d, int p, const char* decimals, size_t length, size_t decimalPosition, int exp) { +template +inline double StrtodFullPrecision(double d, int p, const Ch* decimals, size_t length, size_t decimalPosition, int exp) { RAPIDJSON_ASSERT(d >= 0.0); RAPIDJSON_ASSERT(length >= 1); diff --git a/rapidjson/pointer.h b/rapidjson/pointer.h index 9621442..67a9cb0 100644 --- a/rapidjson/pointer.h +++ b/rapidjson/pointer.h @@ -16,6 +16,7 @@ #define RAPIDJSON_POINTER_H_ #include "document.h" +#include "uri.h" #include "internal/itoa.h" #ifdef __clang__ @@ -80,6 +81,8 @@ class GenericPointer { public: typedef typename ValueType::EncodingType EncodingType; //!< Encoding type from Value typedef typename ValueType::Ch Ch; //!< Character type from Value + typedef GenericUri UriType; + //! A token is the basic units of internal representation. /*! @@ -520,6 +523,70 @@ class GenericPointer { //@} + //!@name Compute URI + //@{ + + //! Compute the in-scope URI for a subtree. + // For use with JSON pointers into JSON schema documents. + /*! + \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root. + \param rootUri Root URI + \param unresolvedTokenIndex If the pointer cannot resolve a token in the pointer, this parameter can obtain the index of unresolved token. + \param allocator Allocator for Uris + \return Uri if it can be resolved. Otherwise null. + + \note + There are only 3 situations when a URI cannot be resolved: + 1. A value in the path is not an array nor object. + 2. An object value does not contain the token. + 3. A token is out of range of an array value. + + Use unresolvedTokenIndex to retrieve the token index. + */ + UriType GetUri(ValueType& root, const UriType& rootUri, size_t* unresolvedTokenIndex = 0, Allocator* allocator = 0) const { + static const Ch kIdString[] = { 'i', 'd', '\0' }; + static const ValueType kIdValue(kIdString, 2); + UriType base = UriType(rootUri, allocator); + RAPIDJSON_ASSERT(IsValid()); + ValueType* v = &root; + for (const Token *t = tokens_; t != tokens_ + tokenCount_; ++t) { + switch (v->GetType()) { + case kObjectType: + { + // See if we have an id, and if so resolve with the current base + typename ValueType::MemberIterator m = v->FindMember(kIdValue); + if (m != v->MemberEnd() && (m->value).IsString()) { + UriType here = UriType(m->value, allocator).Resolve(base, allocator); + base = here; + } + m = v->FindMember(GenericValue(GenericStringRef(t->name, t->length))); + if (m == v->MemberEnd()) + break; + v = &m->value; + } + continue; + case kArrayType: + if (t->index == kPointerInvalidIndex || t->index >= v->Size()) + break; + v = &((*v)[t->index]); + continue; + default: + break; + } + + // Error: unresolved token + if (unresolvedTokenIndex) + *unresolvedTokenIndex = static_cast(t - tokens_); + return UriType(allocator); + } + return base; + } + + UriType GetUri(const ValueType& root, const UriType& rootUri, size_t* unresolvedTokenIndex = 0, Allocator* allocator = 0) const { + return GetUri(const_cast(root), rootUri, unresolvedTokenIndex, allocator); + } + + //!@name Query value //@{ diff --git a/rapidjson/reader.h b/rapidjson/reader.h index 09ace4e..b37afff 100644 --- a/rapidjson/reader.h +++ b/rapidjson/reader.h @@ -1404,11 +1404,11 @@ class GenericReader { } #endif // RAPIDJSON_NEON - template + template class NumberStream; - template - class NumberStream { + template + class NumberStream { public: typedef typename InputStream::Ch Ch; @@ -1421,7 +1421,7 @@ class GenericReader { size_t Tell() { return is.Tell(); } size_t Length() { return 0; } - const char* Pop() { return 0; } + const StackCharacter* Pop() { return 0; } protected: NumberStream& operator=(const NumberStream&); @@ -1429,35 +1429,35 @@ class GenericReader { InputStream& is; }; - template - class NumberStream : public NumberStream { - typedef NumberStream Base; + template + class NumberStream : public NumberStream { + typedef NumberStream Base; public: NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is), stackStream(reader.stack_) {} RAPIDJSON_FORCEINLINE Ch TakePush() { - stackStream.Put(static_cast(Base::is.Peek())); + stackStream.Put(static_cast(Base::is.Peek())); return Base::is.Take(); } - RAPIDJSON_FORCEINLINE void Push(char c) { + RAPIDJSON_FORCEINLINE void Push(StackCharacter c) { stackStream.Put(c); } size_t Length() { return stackStream.Length(); } - const char* Pop() { + const StackCharacter* Pop() { stackStream.Put('\0'); return stackStream.Pop(); } private: - StackStream stackStream; + StackStream stackStream; }; - template - class NumberStream : public NumberStream { - typedef NumberStream Base; + template + class NumberStream : public NumberStream { + typedef NumberStream Base; public: NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is) {} @@ -1466,8 +1466,10 @@ class GenericReader { template void ParseNumber(InputStream& is, Handler& handler) { + typedef typename internal::SelectIf, typename TargetEncoding::Ch, char>::Type NumberCharacter; + internal::StreamLocalCopy copy(is); - NumberStream(s.Length()); - StringStream srcStream(s.Pop()); + GenericStringStream > srcStream(s.Pop()); StackStream dstStream(stack_); while (numCharsToCopy--) { - Transcoder, TargetEncoding>::Transcode(srcStream, dstStream); + Transcoder, TargetEncoding>::Transcode(srcStream, dstStream); } dstStream.Put('\0'); const typename TargetEncoding::Ch* str = dstStream.Pop(); @@ -1705,7 +1707,7 @@ class GenericReader { } else { size_t length = s.Length(); - const char* decimal = s.Pop(); // Pop stack no matter if it will be used or not. + const NumberCharacter* decimal = s.Pop(); // Pop stack no matter if it will be used or not. if (useDouble) { int p = exp + expFrac; diff --git a/rapidjson/schema.h b/rapidjson/schema.h index a034021..f0759ff 100644 --- a/rapidjson/schema.h +++ b/rapidjson/schema.h @@ -19,6 +19,7 @@ #include "pointer.h" #include "stringbuffer.h" #include "error/en.h" +#include "uri.h" #include // abs, floor #if !defined(RAPIDJSON_SCHEMA_USE_INTERNALREGEX) @@ -432,11 +433,13 @@ class Schema { typedef Schema SchemaType; typedef GenericValue SValue; typedef IValidationErrorHandler ErrorHandler; + typedef GenericUri UriType; friend class GenericSchemaDocument; - Schema(SchemaDocumentType* schemaDocument, const PointerType& p, const ValueType& value, const ValueType& document, AllocatorType* allocator) : + Schema(SchemaDocumentType* schemaDocument, const PointerType& p, const ValueType& value, const ValueType& document, AllocatorType* allocator, const UriType& id = UriType()) : allocator_(allocator), uri_(schemaDocument->GetURI(), *allocator), + id_(id), pointer_(p, allocator), typeless_(schemaDocument->GetTypeless()), enum_(), @@ -474,9 +477,28 @@ class Schema { typedef typename ValueType::ConstValueIterator ConstValueIterator; typedef typename ValueType::ConstMemberIterator ConstMemberIterator; + // PR #1393 + // Early add this Schema and its $ref(s) in schemaDocument's map to avoid infinite + // recursion (with recursive schemas), since schemaDocument->getSchema() is always + // checked before creating a new one. Don't cache typeless_, though. + if (this != typeless_) { + typedef typename SchemaDocumentType::SchemaEntry SchemaEntry; + SchemaEntry *entry = schemaDocument->schemaMap_.template Push(); + new (entry) SchemaEntry(pointer_, this, true, allocator_); + schemaDocument->AddSchemaRefs(this); + } + if (!value.IsObject()) return; + // If we have an id property, resolve it with the in-scope id + if (const ValueType* v = GetMember(value, GetIdString())) { + if (v->IsString()) { + UriType local(*v, allocator); + id_ = local.Resolve(id_, allocator); + } + } + if (const ValueType* v = GetMember(value, GetTypeString())) { type_ = 0; if (v->IsString()) @@ -506,9 +528,9 @@ class Schema { AssignIfExist(oneOf_, *schemaDocument, p, value, GetOneOfString(), document); if (const ValueType* v = GetMember(value, GetNotString())) { - schemaDocument->CreateSchema(¬_, p.Append(GetNotString(), allocator_), *v, document); - notValidatorIndex_ = validatorCount_; - validatorCount_++; + schemaDocument->CreateSchema(¬_, p.Append(GetNotString(), allocator_), *v, document, id_); + notValidatorIndex_ = validatorCount_; + validatorCount_++; } } @@ -524,7 +546,7 @@ class Schema { if (properties && properties->IsObject()) for (ConstMemberIterator itr = properties->MemberBegin(); itr != properties->MemberEnd(); ++itr) AddUniqueElement(allProperties, itr->name); - + if (required && required->IsArray()) for (ConstValueIterator itr = required->Begin(); itr != required->End(); ++itr) if (itr->IsString()) @@ -555,7 +577,7 @@ class Schema { for (ConstMemberIterator itr = properties->MemberBegin(); itr != properties->MemberEnd(); ++itr) { SizeType index; if (FindPropertyIndex(itr->name, &index)) - schemaDocument->CreateSchema(&properties_[index].schema, q.Append(itr->name, allocator_), itr->value, document); + schemaDocument->CreateSchema(&properties_[index].schema, q.Append(itr->name, allocator_), itr->value, document, id_); } } @@ -567,7 +589,7 @@ class Schema { for (ConstMemberIterator itr = v->MemberBegin(); itr != v->MemberEnd(); ++itr) { new (&patternProperties_[patternPropertyCount_]) PatternProperty(); patternProperties_[patternPropertyCount_].pattern = CreatePattern(itr->name); - schemaDocument->CreateSchema(&patternProperties_[patternPropertyCount_].schema, q.Append(itr->name, allocator_), itr->value, document); + schemaDocument->CreateSchema(&patternProperties_[patternPropertyCount_].schema, q.Append(itr->name, allocator_), itr->value, document, id_); patternPropertyCount_++; } } @@ -599,7 +621,7 @@ class Schema { } else if (itr->value.IsObject()) { hasSchemaDependencies_ = true; - schemaDocument->CreateSchema(&properties_[sourceIndex].dependenciesSchema, q.Append(itr->name, allocator_), itr->value, document); + schemaDocument->CreateSchema(&properties_[sourceIndex].dependenciesSchema, q.Append(itr->name, allocator_), itr->value, document, id_); properties_[sourceIndex].dependenciesValidatorIndex = validatorCount_; validatorCount_++; } @@ -611,7 +633,7 @@ class Schema { if (v->IsBool()) additionalProperties_ = v->GetBool(); else if (v->IsObject()) - schemaDocument->CreateSchema(&additionalPropertiesSchema_, p.Append(GetAdditionalPropertiesString(), allocator_), *v, document); + schemaDocument->CreateSchema(&additionalPropertiesSchema_, p.Append(GetAdditionalPropertiesString(), allocator_), *v, document, id_); } AssignIfExist(minProperties_, value, GetMinPropertiesString()); @@ -621,12 +643,12 @@ class Schema { if (const ValueType* v = GetMember(value, GetItemsString())) { PointerType q = p.Append(GetItemsString(), allocator_); if (v->IsObject()) // List validation - schemaDocument->CreateSchema(&itemsList_, q, *v, document); + schemaDocument->CreateSchema(&itemsList_, q, *v, document, id_); else if (v->IsArray()) { // Tuple validation itemsTuple_ = static_cast(allocator_->Malloc(sizeof(const Schema*) * v->Size())); SizeType index = 0; for (ConstValueIterator itr = v->Begin(); itr != v->End(); ++itr, index++) - schemaDocument->CreateSchema(&itemsTuple_[itemsTupleCount_++], q.Append(index, allocator_), *itr, document); + schemaDocument->CreateSchema(&itemsTuple_[itemsTupleCount_++], q.Append(index, allocator_), *itr, document, id_); } } @@ -637,7 +659,7 @@ class Schema { if (v->IsBool()) additionalItems_ = v->GetBool(); else if (v->IsObject()) - schemaDocument->CreateSchema(&additionalItemsSchema_, p.Append(GetAdditionalItemsString(), allocator_), *v, document); + schemaDocument->CreateSchema(&additionalItemsSchema_, p.Append(GetAdditionalItemsString(), allocator_), *v, document, id_); } AssignIfExist(uniqueItems_, value, GetUniqueItemsString()); @@ -697,6 +719,10 @@ class Schema { return uri_; } + const UriType& GetId() const { + return id_; + } + const PointerType& GetPointer() const { return pointer_; } @@ -776,7 +802,7 @@ class Schema { foundEnum:; } - // Only check allOf etc if we have validators + // Only check allOf etc if we have validators if (context.validatorCount > 0) { if (allOf_.schemas) for (SizeType i = allOf_.begin; i < allOf_.begin + allOf_.count; i++) @@ -826,7 +852,7 @@ class Schema { } return CreateParallelValidator(context); } - + bool Bool(Context& context, bool) const { if (!(type_ & (1 << kBooleanSchemaType))) { DisallowedType(context, GetBooleanString()); @@ -870,13 +896,13 @@ class Schema { if (!maximum_.IsNull() && !CheckDoubleMaximum(context, d)) return false; - + if (!multipleOf_.IsNull() && !CheckDoubleMultipleOf(context, d)) return false; - + return CreateParallelValidator(context); } - + bool String(Context& context, const Ch* str, SizeType length, bool) const { if (!(type_ & (1 << kStringSchemaType))) { DisallowedType(context, GetStringString()); @@ -925,7 +951,7 @@ class Schema { return CreateParallelValidator(context); } - + bool Key(Context& context, const Ch* str, SizeType len, bool) const { if (patternProperties_) { context.patternPropertiesSchemaCount = 0; @@ -1018,7 +1044,7 @@ class Schema { } } if (context.error_handler.EndDependencyErrors()) - RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorDependencies); + RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorDependencies); } return true; @@ -1038,12 +1064,12 @@ class Schema { bool EndArray(Context& context, SizeType elementCount) const { context.inArray = false; - + if (elementCount < minItems_) { context.error_handler.TooFewItems(elementCount, minItems_); RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorMinItems); } - + if (elementCount > maxItems_) { context.error_handler.TooManyItems(elementCount, maxItems_); RAPIDJSON_INVALID_KEYWORD_RETURN(kValidateErrorMaxItems); @@ -1132,6 +1158,15 @@ class Schema { RAPIDJSON_STRING_(ExclusiveMaximum, 'e', 'x', 'c', 'l', 'u', 's', 'i', 'v', 'e', 'M', 'a', 'x', 'i', 'm', 'u', 'm') RAPIDJSON_STRING_(MultipleOf, 'm', 'u', 'l', 't', 'i', 'p', 'l', 'e', 'O', 'f') RAPIDJSON_STRING_(DefaultValue, 'd', 'e', 'f', 'a', 'u', 'l', 't') + RAPIDJSON_STRING_(Ref, '$', 'r', 'e', 'f') + RAPIDJSON_STRING_(Id, 'i', 'd') + + RAPIDJSON_STRING_(SchemeEnd, ':') + RAPIDJSON_STRING_(AuthStart, '/', '/') + RAPIDJSON_STRING_(QueryStart, '?') + RAPIDJSON_STRING_(FragStart, '#') + RAPIDJSON_STRING_(Slash, '/') + RAPIDJSON_STRING_(Dot, '.') #undef RAPIDJSON_STRING_ @@ -1197,7 +1232,7 @@ class Schema { out.schemas = static_cast(allocator_->Malloc(out.count * sizeof(const Schema*))); memset(out.schemas, 0, sizeof(Schema*)* out.count); for (SizeType i = 0; i < out.count; i++) - schemaDocument.CreateSchema(&out.schemas[i], q.Append(i, allocator_), (*v)[i], document); + schemaDocument.CreateSchema(&out.schemas[i], q.Append(i, allocator_), (*v)[i], document, id_); out.begin = validatorCount_; validatorCount_ += out.count; } @@ -1274,10 +1309,10 @@ class Schema { if (anyOf_.schemas) CreateSchemaValidators(context, anyOf_, false); - + if (oneOf_.schemas) CreateSchemaValidators(context, oneOf_, false); - + if (not_) context.validators[notValidatorIndex_] = context.factory.CreateSchemaValidator(*not_, false); @@ -1301,7 +1336,7 @@ class Schema { SizeType len = name.GetStringLength(); const Ch* str = name.GetString(); for (SizeType index = 0; index < propertyCount_; index++) - if (properties_[index].name.GetStringLength() == len && + if (properties_[index].name.GetStringLength() == len && (std::memcmp(properties_[index].name.GetString(), str, sizeof(Ch) * len) == 0)) { *outIndex = index; @@ -1462,7 +1497,7 @@ class Schema { struct PatternProperty { PatternProperty() : schema(), pattern() {} - ~PatternProperty() { + ~PatternProperty() { if (pattern) { pattern->~RegexType(); AllocatorType::Free(pattern); @@ -1474,6 +1509,7 @@ class Schema { AllocatorType* allocator_; SValue uri_; + UriType id_; PointerType pointer_; const SchemaType* typeless_; uint64_t* enum_; @@ -1516,7 +1552,7 @@ class Schema { SValue multipleOf_; bool exclusiveMinimum_; bool exclusiveMaximum_; - + SizeType defaultValueLength_; }; @@ -1559,9 +1595,12 @@ template class IGenericRemoteSchemaDocumentProvider { public: typedef typename SchemaDocumentType::Ch Ch; + typedef typename SchemaDocumentType::ValueType ValueType; + typedef typename SchemaDocumentType::AllocatorType AllocatorType; virtual ~IGenericRemoteSchemaDocumentProvider() {} virtual const SchemaDocumentType* GetRemoteDocument(const Ch* uri, SizeType length) = 0; + virtual const SchemaDocumentType* GetRemoteDocument(GenericUri uri) { return GetRemoteDocument(uri.GetBaseString(), uri.GetBaseStringLength()); } }; /////////////////////////////////////////////////////////////////////////////// @@ -1586,7 +1625,8 @@ class GenericSchemaDocument { typedef typename EncodingType::Ch Ch; typedef internal::Schema SchemaType; typedef GenericPointer PointerType; - typedef GenericValue URIType; + typedef GenericValue SValue; + typedef GenericUri UriType; friend class internal::Schema; template friend class GenericSchemaValidator; @@ -1600,9 +1640,11 @@ class GenericSchemaDocument { \param uriLength Length of \c name, in code points. \param remoteProvider An optional remote schema document provider for resolving remote reference. Can be null. \param allocator An optional allocator instance for allocating memory. Can be null. + \param pointer An optional JSON pointer to the start of the schema document */ explicit GenericSchemaDocument(const ValueType& document, const Ch* uri = 0, SizeType uriLength = 0, - IRemoteSchemaDocumentProviderType* remoteProvider = 0, Allocator* allocator = 0) : + IRemoteSchemaDocumentProviderType* remoteProvider = 0, Allocator* allocator = 0, + const PointerType& pointer = PointerType()) : // PR #1393 remoteProvider_(remoteProvider), allocator_(allocator), ownAllocator_(), @@ -1616,30 +1658,20 @@ class GenericSchemaDocument { Ch noUri[1] = {0}; uri_.SetString(uri ? uri : noUri, uriLength, *allocator_); + docId_ = UriType(uri_, allocator_); typeless_ = static_cast(allocator_->Malloc(sizeof(SchemaType))); - new (typeless_) SchemaType(this, PointerType(), ValueType(kObjectType).Move(), ValueType(kObjectType).Move(), allocator_); + new (typeless_) SchemaType(this, PointerType(), ValueType(kObjectType).Move(), ValueType(kObjectType).Move(), allocator_, docId_); // Generate root schema, it will call CreateSchema() to create sub-schemas, - // And call AddRefSchema() if there are $ref. - CreateSchemaRecursive(&root_, PointerType(), document, document); - - // Resolve $ref - while (!schemaRef_.Empty()) { - SchemaRefEntry* refEntry = schemaRef_.template Pop(1); - if (const SchemaType* s = GetSchema(refEntry->target)) { - if (refEntry->schema) - *refEntry->schema = s; - - // Create entry in map if not exist - if (!GetSchema(refEntry->source)) { - new (schemaMap_.template Push()) SchemaEntry(refEntry->source, const_cast(s), false, allocator_); - } - } - else if (refEntry->schema) - *refEntry->schema = typeless_; - - refEntry->~SchemaRefEntry(); + // And call HandleRefSchema() if there are $ref. + // PR #1393 use input pointer if supplied + root_ = typeless_; + if (pointer.GetTokenCount() == 0) { + CreateSchemaRecursive(&root_, pointer, document, document, docId_); + } + else if (const ValueType* v = pointer.Get(document)) { + CreateSchema(&root_, pointer, *v, document, docId_); } RAPIDJSON_ASSERT(root_ != 0); @@ -1657,7 +1689,8 @@ class GenericSchemaDocument { typeless_(rhs.typeless_), schemaMap_(std::move(rhs.schemaMap_)), schemaRef_(std::move(rhs.schemaRef_)), - uri_(std::move(rhs.uri_)) + uri_(std::move(rhs.uri_)), + docId_(rhs.docId_) { rhs.remoteProvider_ = 0; rhs.allocator_ = 0; @@ -1679,7 +1712,7 @@ class GenericSchemaDocument { RAPIDJSON_DELETE(ownAllocator_); } - const URIType& GetURI() const { return uri_; } + const SValue& GetURI() const { return uri_; } //! Get the root schema. const SchemaType& GetRoot() const { return *root_; } @@ -1690,12 +1723,7 @@ class GenericSchemaDocument { //! Prohibit assignment GenericSchemaDocument& operator=(const GenericSchemaDocument&); - struct SchemaRefEntry { - SchemaRefEntry(const PointerType& s, const PointerType& t, const SchemaType** outSchema, Allocator *allocator) : source(s, allocator), target(t, allocator), schema(outSchema) {} - PointerType source; - PointerType target; - const SchemaType** schema; - }; + typedef const PointerType* SchemaRefPtr; // PR #1393 struct SchemaEntry { SchemaEntry(const PointerType& p, SchemaType* s, bool o, Allocator* allocator) : pointer(p, allocator), schema(s), owned(o) {} @@ -1710,79 +1738,197 @@ class GenericSchemaDocument { bool owned; }; - void CreateSchemaRecursive(const SchemaType** schema, const PointerType& pointer, const ValueType& v, const ValueType& document) { - if (schema) - *schema = typeless_; - + // Changed by PR #1393 + void CreateSchemaRecursive(const SchemaType** schema, const PointerType& pointer, const ValueType& v, const ValueType& document, const UriType& id) { if (v.GetType() == kObjectType) { - const SchemaType* s = GetSchema(pointer); - if (!s) - CreateSchema(schema, pointer, v, document); + UriType newid = UriType(CreateSchema(schema, pointer, v, document, id), allocator_); for (typename ValueType::ConstMemberIterator itr = v.MemberBegin(); itr != v.MemberEnd(); ++itr) - CreateSchemaRecursive(0, pointer.Append(itr->name, allocator_), itr->value, document); + CreateSchemaRecursive(0, pointer.Append(itr->name, allocator_), itr->value, document, newid); } else if (v.GetType() == kArrayType) for (SizeType i = 0; i < v.Size(); i++) - CreateSchemaRecursive(0, pointer.Append(i, allocator_), v[i], document); + CreateSchemaRecursive(0, pointer.Append(i, allocator_), v[i], document, id); } - void CreateSchema(const SchemaType** schema, const PointerType& pointer, const ValueType& v, const ValueType& document) { + // Changed by PR #1393 + const UriType& CreateSchema(const SchemaType** schema, const PointerType& pointer, const ValueType& v, const ValueType& document, const UriType& id) { RAPIDJSON_ASSERT(pointer.IsValid()); if (v.IsObject()) { - if (!HandleRefSchema(pointer, schema, v, document)) { - SchemaType* s = new (allocator_->Malloc(sizeof(SchemaType))) SchemaType(this, pointer, v, document, allocator_); - new (schemaMap_.template Push()) SchemaEntry(pointer, s, true, allocator_); + if (const SchemaType* sc = GetSchema(pointer)) { + if (schema) + *schema = sc; + AddSchemaRefs(const_cast(sc)); + } + else if (!HandleRefSchema(pointer, schema, v, document, id)) { + // The new schema constructor adds itself and its $ref(s) to schemaMap_ + SchemaType* s = new (allocator_->Malloc(sizeof(SchemaType))) SchemaType(this, pointer, v, document, allocator_, id); if (schema) *schema = s; + return s->GetId(); } } + else { + if (schema) + *schema = typeless_; + AddSchemaRefs(typeless_); + } + return id; } - bool HandleRefSchema(const PointerType& source, const SchemaType** schema, const ValueType& v, const ValueType& document) { - static const Ch kRefString[] = { '$', 'r', 'e', 'f', '\0' }; - static const ValueType kRefValue(kRefString, 4); - - typename ValueType::ConstMemberIterator itr = v.FindMember(kRefValue); + // Changed by PR #1393 + // TODO should this return a UriType& ? + bool HandleRefSchema(const PointerType& source, const SchemaType** schema, const ValueType& v, const ValueType& document, const UriType& id) { + typename ValueType::ConstMemberIterator itr = v.FindMember(SchemaType::GetRefString()); if (itr == v.MemberEnd()) return false; + // Resolve the source pointer to the $ref'ed schema (finally) + new (schemaRef_.template Push()) SchemaRefPtr(&source); + if (itr->value.IsString()) { SizeType len = itr->value.GetStringLength(); if (len > 0) { - const Ch* s = itr->value.GetString(); - SizeType i = 0; - while (i < len && s[i] != '#') // Find the first # - i++; - - if (i > 0) { // Remote reference, resolve immediately + // First resolve $ref against the in-scope id + UriType scopeId = UriType(id, allocator_); + UriType ref = UriType(itr->value, allocator_).Resolve(scopeId, allocator_); + // See if the resolved $ref minus the fragment matches a resolved id in this document + // Search from the root. Returns the subschema in the document and its absolute JSON pointer. + PointerType basePointer = PointerType(); + const ValueType *base = FindId(document, ref, basePointer, docId_, false); + if (!base) { + // Remote reference - call the remote document provider if (remoteProvider_) { - if (const GenericSchemaDocument* remoteDocument = remoteProvider_->GetRemoteDocument(s, i)) { - PointerType pointer(&s[i], len - i, allocator_); - if (pointer.IsValid()) { - if (const SchemaType* sc = remoteDocument->GetSchema(pointer)) { - if (schema) - *schema = sc; - new (schemaMap_.template Push()) SchemaEntry(source, const_cast(sc), false, allocator_); + if (const GenericSchemaDocument* remoteDocument = remoteProvider_->GetRemoteDocument(ref)) { + const Ch* s = ref.GetFragString(); + len = ref.GetFragStringLength(); + if (len <= 1 || s[1] == '/') { + // JSON pointer fragment, absolute in the remote schema + const PointerType pointer(s, len, allocator_); + if (pointer.IsValid()) { + // Get the subschema + if (const SchemaType *sc = remoteDocument->GetSchema(pointer)) { + if (schema) + *schema = sc; + AddSchemaRefs(const_cast(sc)); + return true; + } + } + } else { + // Plain name fragment, not allowed + } + } + } + } + else { // Local reference + const Ch* s = ref.GetFragString(); + len = ref.GetFragStringLength(); + if (len <= 1 || s[1] == '/') { + // JSON pointer fragment, relative to the resolved URI + const PointerType relPointer(s, len, allocator_); + if (relPointer.IsValid()) { + // Get the subschema + if (const ValueType *pv = relPointer.Get(*base)) { + // Now get the absolute JSON pointer by adding relative to base + PointerType pointer(basePointer); + for (SizeType i = 0; i < relPointer.GetTokenCount(); i++) + pointer = pointer.Append(relPointer.GetTokens()[i], allocator_); + //GenericStringBuffer sb; + //pointer.StringifyUriFragment(sb); + if (pointer.IsValid() && !IsCyclicRef(pointer)) { + // Call CreateSchema recursively, but first compute the in-scope id for the $ref target as we have jumped there + // TODO: cache pointer <-> id mapping + size_t unresolvedTokenIndex; + scopeId = pointer.GetUri(document, docId_, &unresolvedTokenIndex, allocator_); + CreateSchema(schema, pointer, *pv, document, scopeId); return true; } } } + } else { + // Plain name fragment, relative to the resolved URI + // See if the fragment matches an id in this document. + // Search from the base we just established. Returns the subschema in the document and its absolute JSON pointer. + PointerType pointer = PointerType(); + if (const ValueType *pv = FindId(*base, ref, pointer, UriType(ref.GetBaseString(), ref.GetBaseStringLength(), allocator_), true, basePointer)) { + if (!IsCyclicRef(pointer)) { + //GenericStringBuffer sb; + //pointer.StringifyUriFragment(sb); + // Call CreateSchema recursively, but first compute the in-scope id for the $ref target as we have jumped there + // TODO: cache pointer <-> id mapping + size_t unresolvedTokenIndex; + scopeId = pointer.GetUri(document, docId_, &unresolvedTokenIndex, allocator_); + CreateSchema(schema, pointer, *pv, document, scopeId); + return true; + } + } } } - else if (s[i] == '#') { // Local reference, defer resolution - PointerType pointer(&s[i], len - i, allocator_); - if (pointer.IsValid()) { - if (const ValueType* nv = pointer.Get(document)) - if (HandleRefSchema(source, schema, *nv, document)) - return true; + } + } - new (schemaRef_.template Push()) SchemaRefEntry(source, pointer, schema, allocator_); - return true; - } + // Invalid/Unknown $ref + if (schema) + *schema = typeless_; + AddSchemaRefs(typeless_); + return true; + } + + //! Find the first subschema with a resolved 'id' that matches the specified URI. + // If full specified use all URI else ignore fragment. + // If found, return a pointer to the subschema and its JSON pointer. + // TODO cache pointer <-> id mapping + ValueType* FindId(const ValueType& doc, const UriType& finduri, PointerType& resptr, const UriType& baseuri, bool full, const PointerType& here = PointerType()) const { + SizeType i = 0; + ValueType* resval = 0; + UriType tempuri = UriType(finduri, allocator_); + UriType localuri = UriType(baseuri, allocator_); + if (doc.GetType() == kObjectType) { + // Establish the base URI of this object + typename ValueType::ConstMemberIterator m = doc.FindMember(SchemaType::GetIdString()); + if (m != doc.MemberEnd() && m->value.GetType() == kStringType) { + localuri = UriType(m->value, allocator_).Resolve(baseuri, allocator_); + } + // See if it matches + if (localuri.Match(finduri, full)) { + resval = const_cast(&doc); + resptr = here; + return resval; + } + // No match, continue looking + for (m = doc.MemberBegin(); m != doc.MemberEnd(); ++m) { + if (m->value.GetType() == kObjectType || m->value.GetType() == kArrayType) { + resval = FindId(m->value, finduri, resptr, localuri, full, here.Append(m->name.GetString(), m->name.GetStringLength(), allocator_)); } + if (resval) break; + } + } else if (doc.GetType() == kArrayType) { + // Continue looking + for (typename ValueType::ConstValueIterator v = doc.Begin(); v != doc.End(); ++v) { + if (v->GetType() == kObjectType || v->GetType() == kArrayType) { + resval = FindId(*v, finduri, resptr, localuri, full, here.Append(i, allocator_)); + } + if (resval) break; + i++; } } + return resval; + } + + // Added by PR #1393 + void AddSchemaRefs(SchemaType* schema) { + while (!schemaRef_.Empty()) { + SchemaRefPtr *ref = schemaRef_.template Pop(1); + SchemaEntry *entry = schemaMap_.template Push(); + new (entry) SchemaEntry(**ref, schema, false, allocator_); + } + } + + // Added by PR #1393 + bool IsCyclicRef(const PointerType& pointer) const { + for (const SchemaRefPtr* ref = schemaRef_.template Bottom(); ref != schemaRef_.template End(); ++ref) + if (pointer == **ref) + return true; return false; } @@ -1811,8 +1957,9 @@ class GenericSchemaDocument { const SchemaType* root_; //!< Root schema. SchemaType* typeless_; internal::Stack schemaMap_; // Stores created Pointer -> Schemas - internal::Stack schemaRef_; // Stores Pointer from $ref and schema which holds the $ref - URIType uri_; + internal::Stack schemaRef_; // Stores Pointer(s) from $ref(s) until resolved + SValue uri_; // Schema document URI + UriType docId_; }; //! GenericSchemaDocument using Value type. diff --git a/rapidjson/uri.h b/rapidjson/uri.h new file mode 100644 index 0000000..f93e508 --- /dev/null +++ b/rapidjson/uri.h @@ -0,0 +1,481 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// (C) Copyright IBM Corporation 2021 +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_URI_H_ +#define RAPIDJSON_URI_H_ + +#include "internal/strfunc.h" + +#if defined(__clang__) +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(c++98-compat) +#elif defined(_MSC_VER) +RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated +#endif + +RAPIDJSON_NAMESPACE_BEGIN + +/////////////////////////////////////////////////////////////////////////////// +// GenericUri + +template +class GenericUri { +public: + typedef typename ValueType::Ch Ch; +#if RAPIDJSON_HAS_STDSTRING + typedef std::basic_string String; +#endif + + //! Constructors + GenericUri(Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() { + } + + GenericUri(const Ch* uri, SizeType len, Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() { + Parse(uri, len); + } + + GenericUri(const Ch* uri, Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() { + Parse(uri, internal::StrLen(uri)); + } + + // Use with specializations of GenericValue + template GenericUri(const T& uri, Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() { + const Ch* u = uri.template Get(); // TypeHelper from document.h + Parse(u, internal::StrLen(u)); + } + +#if RAPIDJSON_HAS_STDSTRING + GenericUri(const String& uri, Allocator* allocator = 0) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() { + Parse(uri.c_str(), internal::StrLen(uri.c_str())); + } +#endif + + //! Copy constructor + GenericUri(const GenericUri& rhs) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(), ownAllocator_() { + *this = rhs; + } + + //! Copy constructor + GenericUri(const GenericUri& rhs, Allocator* allocator) : uri_(), base_(), scheme_(), auth_(), path_(), query_(), frag_(), allocator_(allocator), ownAllocator_() { + *this = rhs; + } + + //! Destructor. + ~GenericUri() { + Free(); + RAPIDJSON_DELETE(ownAllocator_); + } + + //! Assignment operator + GenericUri& operator=(const GenericUri& rhs) { + if (this != &rhs) { + // Do not delete ownAllocator + Free(); + Allocate(rhs.GetStringLength()); + auth_ = CopyPart(scheme_, rhs.scheme_, rhs.GetSchemeStringLength()); + path_ = CopyPart(auth_, rhs.auth_, rhs.GetAuthStringLength()); + query_ = CopyPart(path_, rhs.path_, rhs.GetPathStringLength()); + frag_ = CopyPart(query_, rhs.query_, rhs.GetQueryStringLength()); + base_ = CopyPart(frag_, rhs.frag_, rhs.GetFragStringLength()); + uri_ = CopyPart(base_, rhs.base_, rhs.GetBaseStringLength()); + CopyPart(uri_, rhs.uri_, rhs.GetStringLength()); + } + return *this; + } + + //! Getters + // Use with specializations of GenericValue + template void Get(T& uri, Allocator& allocator) { + uri.template Set(this->GetString(), allocator); // TypeHelper from document.h + } + + const Ch* GetString() const { return uri_; } + SizeType GetStringLength() const { return uri_ == 0 ? 0 : internal::StrLen(uri_); } + const Ch* GetBaseString() const { return base_; } + SizeType GetBaseStringLength() const { return base_ == 0 ? 0 : internal::StrLen(base_); } + const Ch* GetSchemeString() const { return scheme_; } + SizeType GetSchemeStringLength() const { return scheme_ == 0 ? 0 : internal::StrLen(scheme_); } + const Ch* GetAuthString() const { return auth_; } + SizeType GetAuthStringLength() const { return auth_ == 0 ? 0 : internal::StrLen(auth_); } + const Ch* GetPathString() const { return path_; } + SizeType GetPathStringLength() const { return path_ == 0 ? 0 : internal::StrLen(path_); } + const Ch* GetQueryString() const { return query_; } + SizeType GetQueryStringLength() const { return query_ == 0 ? 0 : internal::StrLen(query_); } + const Ch* GetFragString() const { return frag_; } + SizeType GetFragStringLength() const { return frag_ == 0 ? 0 : internal::StrLen(frag_); } + +#if RAPIDJSON_HAS_STDSTRING + static String Get(const GenericUri& uri) { return String(uri.GetString(), uri.GetStringLength()); } + static String GetBase(const GenericUri& uri) { return String(uri.GetBaseString(), uri.GetBaseStringLength()); } + static String GetScheme(const GenericUri& uri) { return String(uri.GetSchemeString(), uri.GetSchemeStringLength()); } + static String GetAuth(const GenericUri& uri) { return String(uri.GetAuthString(), uri.GetAuthStringLength()); } + static String GetPath(const GenericUri& uri) { return String(uri.GetPathString(), uri.GetPathStringLength()); } + static String GetQuery(const GenericUri& uri) { return String(uri.GetQueryString(), uri.GetQueryStringLength()); } + static String GetFrag(const GenericUri& uri) { return String(uri.GetFragString(), uri.GetFragStringLength()); } +#endif + + //! Equality operators + bool operator==(const GenericUri& rhs) const { + return Match(rhs, true); + } + + bool operator!=(const GenericUri& rhs) const { + return !Match(rhs, true); + } + + bool Match(const GenericUri& uri, bool full = true) const { + Ch* s1; + Ch* s2; + if (full) { + s1 = uri_; + s2 = uri.uri_; + } else { + s1 = base_; + s2 = uri.base_; + } + if (s1 == s2) return true; + if (s1 == 0 || s2 == 0) return false; + return internal::StrCmp(s1, s2) == 0; + } + + //! Resolve this URI against another (base) URI in accordance with URI resolution rules. + // See https://tools.ietf.org/html/rfc3986 + // Use for resolving an id or $ref with an in-scope id. + // Returns a new GenericUri for the resolved URI. + GenericUri Resolve(const GenericUri& baseuri, Allocator* allocator = 0) { + GenericUri resuri; + resuri.allocator_ = allocator; + // Ensure enough space for combining paths + resuri.Allocate(GetStringLength() + baseuri.GetStringLength() + 1); // + 1 for joining slash + + if (!(GetSchemeStringLength() == 0)) { + // Use all of this URI + resuri.auth_ = CopyPart(resuri.scheme_, scheme_, GetSchemeStringLength()); + resuri.path_ = CopyPart(resuri.auth_, auth_, GetAuthStringLength()); + resuri.query_ = CopyPart(resuri.path_, path_, GetPathStringLength()); + resuri.frag_ = CopyPart(resuri.query_, query_, GetQueryStringLength()); + resuri.RemoveDotSegments(); + } else { + // Use the base scheme + resuri.auth_ = CopyPart(resuri.scheme_, baseuri.scheme_, baseuri.GetSchemeStringLength()); + if (!(GetAuthStringLength() == 0)) { + // Use this auth, path, query + resuri.path_ = CopyPart(resuri.auth_, auth_, GetAuthStringLength()); + resuri.query_ = CopyPart(resuri.path_, path_, GetPathStringLength()); + resuri.frag_ = CopyPart(resuri.query_, query_, GetQueryStringLength()); + resuri.RemoveDotSegments(); + } else { + // Use the base auth + resuri.path_ = CopyPart(resuri.auth_, baseuri.auth_, baseuri.GetAuthStringLength()); + if (GetPathStringLength() == 0) { + // Use the base path + resuri.query_ = CopyPart(resuri.path_, baseuri.path_, baseuri.GetPathStringLength()); + if (GetQueryStringLength() == 0) { + // Use the base query + resuri.frag_ = CopyPart(resuri.query_, baseuri.query_, baseuri.GetQueryStringLength()); + } else { + // Use this query + resuri.frag_ = CopyPart(resuri.query_, query_, GetQueryStringLength()); + } + } else { + if (path_[0] == '/') { + // Absolute path - use all of this path + resuri.query_ = CopyPart(resuri.path_, path_, GetPathStringLength()); + resuri.RemoveDotSegments(); + } else { + // Relative path - append this path to base path after base path's last slash + size_t pos = 0; + if (!(baseuri.GetAuthStringLength() == 0) && baseuri.GetPathStringLength() == 0) { + resuri.path_[pos] = '/'; + pos++; + } + size_t lastslashpos = baseuri.GetPathStringLength(); + while (lastslashpos > 0) { + if (baseuri.path_[lastslashpos - 1] == '/') break; + lastslashpos--; + } + std::memcpy(&resuri.path_[pos], baseuri.path_, lastslashpos * sizeof(Ch)); + pos += lastslashpos; + resuri.query_ = CopyPart(&resuri.path_[pos], path_, GetPathStringLength()); + resuri.RemoveDotSegments(); + } + // Use this query + resuri.frag_ = CopyPart(resuri.query_, query_, GetQueryStringLength()); + } + } + } + // Always use this frag + resuri.base_ = CopyPart(resuri.frag_, frag_, GetFragStringLength()); + + // Re-constitute base_ and uri_ + resuri.SetBase(); + resuri.uri_ = resuri.base_ + resuri.GetBaseStringLength() + 1; + resuri.SetUri(); + return resuri; + } + + //! Get the allocator of this GenericUri. + Allocator& GetAllocator() { return *allocator_; } + +private: + // Allocate memory for a URI + // Returns total amount allocated + std::size_t Allocate(std::size_t len) { + // Create own allocator if user did not supply. + if (!allocator_) + ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)(); + + // Allocate one block containing each part of the URI (5) plus base plus full URI, all null terminated. + // Order: scheme, auth, path, query, frag, base, uri + // Note need to set, increment, assign in 3 stages to avoid compiler warning bug. + size_t total = (3 * len + 7) * sizeof(Ch); + scheme_ = static_cast(allocator_->Malloc(total)); + *scheme_ = '\0'; + auth_ = scheme_; + auth_++; + *auth_ = '\0'; + path_ = auth_; + path_++; + *path_ = '\0'; + query_ = path_; + query_++; + *query_ = '\0'; + frag_ = query_; + frag_++; + *frag_ = '\0'; + base_ = frag_; + base_++; + *base_ = '\0'; + uri_ = base_; + uri_++; + *uri_ = '\0'; + return total; + } + + // Free memory for a URI + void Free() { + if (scheme_) { + Allocator::Free(scheme_); + scheme_ = 0; + } + } + + // Parse a URI into constituent scheme, authority, path, query, & fragment parts + // Supports URIs that match regex ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? as per + // https://tools.ietf.org/html/rfc3986 + void Parse(const Ch* uri, std::size_t len) { + std::size_t start = 0, pos1 = 0, pos2 = 0; + Allocate(len); + + // Look for scheme ([^:/?#]+):)? + if (start < len) { + while (pos1 < len) { + if (uri[pos1] == ':') break; + pos1++; + } + if (pos1 != len) { + while (pos2 < len) { + if (uri[pos2] == '/') break; + if (uri[pos2] == '?') break; + if (uri[pos2] == '#') break; + pos2++; + } + if (pos1 < pos2) { + pos1++; + std::memcpy(scheme_, &uri[start], pos1 * sizeof(Ch)); + scheme_[pos1] = '\0'; + start = pos1; + } + } + } + // Look for auth (//([^/?#]*))? + // Note need to set, increment, assign in 3 stages to avoid compiler warning bug. + auth_ = scheme_ + GetSchemeStringLength(); + auth_++; + *auth_ = '\0'; + if (start < len - 1 && uri[start] == '/' && uri[start + 1] == '/') { + pos2 = start + 2; + while (pos2 < len) { + if (uri[pos2] == '/') break; + if (uri[pos2] == '?') break; + if (uri[pos2] == '#') break; + pos2++; + } + std::memcpy(auth_, &uri[start], (pos2 - start) * sizeof(Ch)); + auth_[pos2 - start] = '\0'; + start = pos2; + } + // Look for path ([^?#]*) + // Note need to set, increment, assign in 3 stages to avoid compiler warning bug. + path_ = auth_ + GetAuthStringLength(); + path_++; + *path_ = '\0'; + if (start < len) { + pos2 = start; + while (pos2 < len) { + if (uri[pos2] == '?') break; + if (uri[pos2] == '#') break; + pos2++; + } + if (start != pos2) { + std::memcpy(path_, &uri[start], (pos2 - start) * sizeof(Ch)); + path_[pos2 - start] = '\0'; + if (path_[0] == '/') + RemoveDotSegments(); // absolute path - normalize + start = pos2; + } + } + // Look for query (\?([^#]*))? + // Note need to set, increment, assign in 3 stages to avoid compiler warning bug. + query_ = path_ + GetPathStringLength(); + query_++; + *query_ = '\0'; + if (start < len && uri[start] == '?') { + pos2 = start + 1; + while (pos2 < len) { + if (uri[pos2] == '#') break; + pos2++; + } + if (start != pos2) { + std::memcpy(query_, &uri[start], (pos2 - start) * sizeof(Ch)); + query_[pos2 - start] = '\0'; + start = pos2; + } + } + // Look for fragment (#(.*))? + // Note need to set, increment, assign in 3 stages to avoid compiler warning bug. + frag_ = query_ + GetQueryStringLength(); + frag_++; + *frag_ = '\0'; + if (start < len && uri[start] == '#') { + std::memcpy(frag_, &uri[start], (len - start) * sizeof(Ch)); + frag_[len - start] = '\0'; + } + + // Re-constitute base_ and uri_ + base_ = frag_ + GetFragStringLength() + 1; + SetBase(); + uri_ = base_ + GetBaseStringLength() + 1; + SetUri(); + } + + // Reconstitute base + void SetBase() { + Ch* next = base_; + std::memcpy(next, scheme_, GetSchemeStringLength() * sizeof(Ch)); + next+= GetSchemeStringLength(); + std::memcpy(next, auth_, GetAuthStringLength() * sizeof(Ch)); + next+= GetAuthStringLength(); + std::memcpy(next, path_, GetPathStringLength() * sizeof(Ch)); + next+= GetPathStringLength(); + std::memcpy(next, query_, GetQueryStringLength() * sizeof(Ch)); + next+= GetQueryStringLength(); + *next = '\0'; + } + + // Reconstitute uri + void SetUri() { + Ch* next = uri_; + std::memcpy(next, base_, GetBaseStringLength() * sizeof(Ch)); + next+= GetBaseStringLength(); + std::memcpy(next, frag_, GetFragStringLength() * sizeof(Ch)); + next+= GetFragStringLength(); + *next = '\0'; + } + + // Copy a part from one GenericUri to another + // Return the pointer to the next part to be copied to + Ch* CopyPart(Ch* to, Ch* from, std::size_t len) { + RAPIDJSON_ASSERT(to != 0); + RAPIDJSON_ASSERT(from != 0); + std::memcpy(to, from, len * sizeof(Ch)); + to[len] = '\0'; + Ch* next = to + len + 1; + return next; + } + + // Remove . and .. segments from the path_ member. + // https://tools.ietf.org/html/rfc3986 + // This is done in place as we are only removing segments. + void RemoveDotSegments() { + std::size_t pathlen = GetPathStringLength(); + std::size_t pathpos = 0; // Position in path_ + std::size_t newpos = 0; // Position in new path_ + + // Loop through each segment in original path_ + while (pathpos < pathlen) { + // Get next segment, bounded by '/' or end + size_t slashpos = 0; + while ((pathpos + slashpos) < pathlen) { + if (path_[pathpos + slashpos] == '/') break; + slashpos++; + } + // Check for .. and . segments + if (slashpos == 2 && path_[pathpos] == '.' && path_[pathpos + 1] == '.') { + // Backup a .. segment in the new path_ + // We expect to find a previously added slash at the end or nothing + RAPIDJSON_ASSERT(newpos == 0 || path_[newpos - 1] == '/'); + size_t lastslashpos = newpos; + // Make sure we don't go beyond the start segment + if (lastslashpos > 1) { + // Find the next to last slash and back up to it + lastslashpos--; + while (lastslashpos > 0) { + if (path_[lastslashpos - 1] == '/') break; + lastslashpos--; + } + // Set the new path_ position + newpos = lastslashpos; + } + } else if (slashpos == 1 && path_[pathpos] == '.') { + // Discard . segment, leaves new path_ unchanged + } else { + // Move any other kind of segment to the new path_ + RAPIDJSON_ASSERT(newpos <= pathpos); + std::memmove(&path_[newpos], &path_[pathpos], slashpos * sizeof(Ch)); + newpos += slashpos; + // Add slash if not at end + if ((pathpos + slashpos) < pathlen) { + path_[newpos] = '/'; + newpos++; + } + } + // Move to next segment + pathpos += slashpos + 1; + } + path_[newpos] = '\0'; + } + + Ch* uri_; // Everything + Ch* base_; // Everything except fragment + Ch* scheme_; // Includes the : + Ch* auth_; // Includes the // + Ch* path_; // Absolute if starts with / + Ch* query_; // Includes the ? + Ch* frag_; // Includes the # + + Allocator* allocator_; //!< The current allocator. It is either user-supplied or equal to ownAllocator_. + Allocator* ownAllocator_; //!< Allocator owned by this Uri. +}; + +//! GenericUri for Value (UTF-8, default allocator). +typedef GenericUri Uri; + +RAPIDJSON_NAMESPACE_END + +#if defined(__clang__) +RAPIDJSON_DIAG_POP +#endif + +#endif // RAPIDJSON_URI_H_