From 5255f92f063ea0528d5195c3c12356567998be64 Mon Sep 17 00:00:00 2001 From: Julien Marrec Date: Thu, 20 Jun 2024 11:51:54 +0200 Subject: [PATCH] Fix #10322 - Bump CLI11 from 2.3.2 to 2.4.2 Turns out the issue is that I rely on CLI11's (circa 2.3.2) interpretation of argc/argv, which for linux reads `/proc/self/cmdline` separated by null terminators. https://github.com/CLIUtils/CLI11/blob/985a19f3860be0cba87354336f6588494b20111c/include/CLI/impl/Argv_inl.hpp#L141 When you run under rosetta, you end up with a duplicated program path, looks like rosetta is trying to remove itself from the command line but duplicating it. So wheen you call `energyplus --help` what cmdline has is `energyplus energyplus --help`. This bumps to CLI11 to 2.4.2, and moves back to using the actual main argc/argv instead of relying on the `static_args` that reads `/proc/self/cmdline` Note: I suppose I could also have achieved the same thing without bumping to 2.4.2, but might as well do it since there were numeros unicode related fixes as well --- src/ConvertInputFormat/main.cpp | 5 +- src/EnergyPlus/main.cc | 9 +- third_party/CLI/CLI11.hpp | 1514 +++++++++++++++++++++++-------- third_party/CLI/README.md | 3 +- 4 files changed, 1126 insertions(+), 405 deletions(-) diff --git a/src/ConvertInputFormat/main.cpp b/src/ConvertInputFormat/main.cpp index d53e6d81790..51c911434aa 100644 --- a/src/ConvertInputFormat/main.cpp +++ b/src/ConvertInputFormat/main.cpp @@ -436,10 +436,11 @@ std::vector parse_input_paths(fs::path const &inputFilePath) return input_paths; } -int main(/** [[maybe_unused]] int argc, [[maybe_unused]] const char *argv[] */) +int main(int argc, char **argv) { CLI::App app{"ConvertInputFormat"}; + argv = app.ensure_utf8(argv); app.description("Run input file conversion tool"); app.set_version_flag("-v,--version", EnergyPlus::DataStringGlobals::VerString); @@ -502,7 +503,7 @@ Select one (case insensitive): // We are not modifying argc/argv, so we defer to CLI11 to find the argc/argv instead. It'll use GetCommandLineW & CommandLineToArgvW on windows try { - app.parse(); + app.parse(argc, argv); } catch (const CLI::ParseError &e) { return app.exit(e); } diff --git a/src/EnergyPlus/main.cc b/src/EnergyPlus/main.cc index 7fe25b07e36..b7ee17b6659 100644 --- a/src/EnergyPlus/main.cc +++ b/src/EnergyPlus/main.cc @@ -54,15 +54,16 @@ #include #endif -int main() +int main(int argc, char **argv) { #ifdef DEBUG_ARITHM_GCC_OR_CLANG feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); #endif - int const argc = CLI::argc(); - const char *const *argv = CLI::argv(); // This is going to use CommandLineToArgvW on Windows and **narrow** from wchar_t to char - +#ifdef _WIN32 + const std::vector args = CLI::detail::compute_win32_argv(); +#else const std::vector args(argv, std::next(argv, static_cast(argc))); +#endif return EnergyPlusPgm(args); } diff --git a/third_party/CLI/CLI11.hpp b/third_party/CLI/CLI11.hpp index e2437d5e8f0..8a5b4c54474 100644 --- a/third_party/CLI/CLI11.hpp +++ b/third_party/CLI/CLI11.hpp @@ -1,11 +1,11 @@ -// CLI11: Version 2.3.2 +// CLI11: Version 2.4.2 // Originally designed by Henry Schreiner // https://github.com/CLIUtils/CLI11 // // This is a standalone header file generated by MakeSingleHeader.py in CLI11/scripts -// from: v2.3.2-34-g985a19f +// from: v2.4.2 // -// CLI11 2.3.2 Copyright (c) 2017-2023 University of Cincinnati, developed by Henry +// CLI11 2.4.2 Copyright (c) 2017-2024 University of Cincinnati, developed by Henry // Schreiner under NSF AWARD 1414736. All rights reserved. // // Redistribution and use in source and binary forms of CLI11, with or without @@ -65,9 +65,9 @@ #define CLI11_VERSION_MAJOR 2 -#define CLI11_VERSION_MINOR 3 +#define CLI11_VERSION_MINOR 4 #define CLI11_VERSION_PATCH 2 -#define CLI11_VERSION "2.3.2" +#define CLI11_VERSION "2.4.2" @@ -225,6 +225,10 @@ #define _X86_ #elif defined(__arm__) || defined(_M_ARM) || defined(_M_ARMT) #define _ARM_ +#elif defined(__aarch64__) || defined(_M_ARM64) +#define _ARM64_ +#elif defined(_M_ARM64EC) +#define _ARM64EC_ #endif #endif @@ -243,9 +247,6 @@ // third #include #include - -#elif defined(__APPLE__) -#include #endif @@ -402,138 +403,42 @@ CLI11_INLINE std::filesystem::path to_path(std::string_view str) { -/// argc as passed in to this executable. -CLI11_INLINE int argc(); - -/// argv as passed in to this executable, converted to utf-8 on Windows. -CLI11_INLINE const char *const *argv(); - - - - namespace detail { - -#ifdef __APPLE__ -// Copy argc and argv as early as possible to avoid modification -static const std::vector static_args = [] { - static const std::vector static_args_as_strings = [] { - std::vector args_as_strings; - int argc = *_NSGetArgc(); - char **argv = *_NSGetArgv(); - - args_as_strings.reserve(static_cast(argc)); - for(size_t i = 0; i < static_cast(argc); i++) { - args_as_strings.push_back(argv[i]); - } - - return args_as_strings; - }(); - - std::vector static_args_result; - static_args_result.reserve(static_args_as_strings.size()); - - for(const auto &arg : static_args_as_strings) { - static_args_result.push_back(arg.data()); - } - - return static_args_result; -}(); -#endif - -/// Command-line arguments, as passed in to this executable, converted to utf-8 on Windows. -CLI11_INLINE const std::vector &args() { - // This function uses initialization via lambdas extensively to take advantage of the thread safety of static - // variable initialization [stmt.dcl.3] - #ifdef _WIN32 - static const std::vector static_args = [] { - static const std::vector static_args_as_strings = [] { - // On Windows, take arguments from GetCommandLineW and convert them to utf-8. - std::vector args_as_strings; - int argc = 0; - - auto deleter = [](wchar_t **ptr) { LocalFree(ptr); }; - // NOLINTBEGIN(*-avoid-c-arrays) - auto wargv = - std::unique_ptr(CommandLineToArgvW(GetCommandLineW(), &argc), deleter); - // NOLINTEND(*-avoid-c-arrays) - - if(wargv == nullptr) { - throw std::runtime_error("CommandLineToArgvW failed with code " + std::to_string(GetLastError())); - } - - args_as_strings.reserve(static_cast(argc)); - for(size_t i = 0; i < static_cast(argc); ++i) { - args_as_strings.push_back(narrow(wargv[i])); - } - - return args_as_strings; - }(); - - std::vector static_args_result; - static_args_result.reserve(static_args_as_strings.size()); - - for(const auto &arg : static_args_as_strings) { - static_args_result.push_back(arg.data()); - } - - return static_args_result; - }(); - - return static_args; - -#elif defined(__APPLE__) - - return static_args; - -#else - static const std::vector static_args = [] { - static const std::vector static_cmdline = [] { - // On posix, retrieve arguments from /proc/self/cmdline, separated by null terminators. - std::vector cmdline; +/// Decode and return UTF-8 argv from GetCommandLineW. +CLI11_INLINE std::vector compute_win32_argv(); +#endif +} // namespace detail - auto deleter = [](FILE *f) { std::fclose(f); }; - std::unique_ptr fp_unique(std::fopen("/proc/self/cmdline", "r"), deleter); - FILE *fp = fp_unique.get(); - if(!fp) { - throw std::runtime_error("could not open /proc/self/cmdline for reading"); // LCOV_EXCL_LINE - } - size_t size = 0; - while(std::feof(fp) == 0) { - cmdline.resize(size + 128); - size += std::fread(cmdline.data() + size, 1, 128, fp); - if(std::ferror(fp) != 0) { - throw std::runtime_error("error during reading /proc/self/cmdline"); // LCOV_EXCL_LINE - } - } - cmdline.resize(size); +namespace detail { - return cmdline; - }(); +#ifdef _WIN32 +CLI11_INLINE std::vector compute_win32_argv() { + std::vector result; + int argc = 0; - std::size_t argc = static_cast(std::count(static_cmdline.begin(), static_cmdline.end(), '\0')); - std::vector static_args_result; - static_args_result.reserve(argc); + auto deleter = [](wchar_t **ptr) { LocalFree(ptr); }; + // NOLINTBEGIN(*-avoid-c-arrays) + auto wargv = std::unique_ptr(CommandLineToArgvW(GetCommandLineW(), &argc), deleter); + // NOLINTEND(*-avoid-c-arrays) - for(auto it = static_cmdline.begin(); it != static_cmdline.end(); - it = std::find(it, static_cmdline.end(), '\0') + 1) { - static_args_result.push_back(static_cmdline.data() + (it - static_cmdline.begin())); - } + if(wargv == nullptr) { + throw std::runtime_error("CommandLineToArgvW failed with code " + std::to_string(GetLastError())); + } - return static_args_result; - }(); + result.reserve(static_cast(argc)); + for(size_t i = 0; i < static_cast(argc); ++i) { + result.push_back(narrow(wargv[i])); + } - return static_args; -#endif + return result; } +#endif } // namespace detail -CLI11_INLINE const char *const *argv() { return detail::args().data(); } -CLI11_INLINE int argc() { return static_cast(detail::args().size()); } - @@ -634,6 +539,9 @@ inline std::string trim_copy(const std::string &str) { /// remove quotes at the front and back of a string either '"' or '\'' CLI11_INLINE std::string &remove_quotes(std::string &str); +/// remove quotes from all elements of a string vector and process escaped components +CLI11_INLINE void remove_quotes(std::vector &args); + /// Add a leader to the beginning of all new lines (nothing is added /// at the start of the first line). `"; "` would be for ini files /// @@ -654,14 +562,16 @@ CLI11_INLINE std::ostream &format_aliases(std::ostream &out, const std::vector bool valid_first_char(T c) { return ((c != '-') && (c != '!') && (c != ' ') && c != '\n'); } +template bool valid_first_char(T c) { + return ((c != '-') && (static_cast(c) > 33)); // space and '!' not allowed +} /// Verify following characters of an option template bool valid_later_char(T c) { // = and : are value separators, { has special meaning for option defaults, - // and \n would just be annoying to deal with in many places allowing space here has too much potential for - // inadvertent entry errors and bugs - return ((c != '=') && (c != ':') && (c != '{') && (c != ' ') && c != '\n'); + // and control codes other than tab would just be annoying to deal with in many places allowing space here has too + // much potential for inadvertent entry errors and bugs + return ((c != '=') && (c != ':') && (c != '{') && ((static_cast(c) > 32) || c == '\t')); } /// Verify an option/subcommand name @@ -724,21 +634,47 @@ template inline std::string find_and_modify(std::string str, return str; } +/// close a sequence of characters indicated by a closure character. Brackets allows sub sequences +/// recognized bracket sequences include "'`[(<{ other closure characters are assumed to be literal strings +CLI11_INLINE std::size_t close_sequence(const std::string &str, std::size_t start, char closure_char); + /// Split a string '"one two" "three"' into 'one two', 'three' -/// Quote characters can be ` ' or " +/// Quote characters can be ` ' or " or bracket characters [{(< with matching to the matching bracket CLI11_INLINE std::vector split_up(std::string str, char delimiter = '\0'); +/// get the value of an environmental variable or empty string if empty +CLI11_INLINE std::string get_environment_value(const std::string &env_name); + /// This function detects an equal or colon followed by an escaped quote after an argument /// then modifies the string to replace the equality with a space. This is needed /// to allow the split up function to work properly and is intended to be used with the find_and_modify function /// the return value is the offset+1 which is required by the find_and_modify function. CLI11_INLINE std::size_t escape_detect(std::string &str, std::size_t offset); -/// Add quotes if the string contains spaces -CLI11_INLINE std::string &add_quotes_if_needed(std::string &str); +/// @brief detect if a string has escapable characters +/// @param str the string to do the detection on +/// @return true if the string has escapable characters +CLI11_INLINE bool has_escapable_character(const std::string &str); + +/// @brief escape all escapable characters +/// @param str the string to escape +/// @return a string with the escapble characters escaped with '\' +CLI11_INLINE std::string add_escaped_characters(const std::string &str); + +/// @brief replace the escaped characters with their equivalent +CLI11_INLINE std::string remove_escaped_characters(const std::string &str); + +/// generate a string with all non printable characters escaped to hex codes +CLI11_INLINE std::string binary_escape_string(const std::string &string_to_escape); + +CLI11_INLINE bool is_binary_escaped_string(const std::string &escaped_string); + +/// extract an escaped binary_string +CLI11_INLINE std::string extract_binary_string(const std::string &escaped_string); + +/// process a quoted string, remove the quotes and if appropriate handle escaped characters +CLI11_INLINE bool process_quoted_string(std::string &str, char string_char = '\"', char literal_char = '\''); -/// get the value of an environmental variable or empty string if empty -CLI11_INLINE std::string get_environment_value(const std::string &env_name); } // namespace detail @@ -787,7 +723,17 @@ CLI11_INLINE std::string &rtrim(std::string &str, const std::string &filter) { } CLI11_INLINE std::string &remove_quotes(std::string &str) { - if(str.length() > 1 && (str.front() == '"' || str.front() == '\'')) { + if(str.length() > 1 && (str.front() == '"' || str.front() == '\'' || str.front() == '`')) { + if(str.front() == str.back()) { + str.pop_back(); + str.erase(str.begin(), str.begin() + 1); + } + } + return str; +} + +CLI11_INLINE std::string &remove_outer(std::string &str, char key) { + if(str.length() > 1 && (str.front() == key)) { if(str.front() == str.back()) { str.pop_back(); str.erase(str.begin(), str.begin() + 1); @@ -907,37 +853,220 @@ find_member(std::string name, const std::vector names, bool ignore_ return (it != std::end(names)) ? (it - std::begin(names)) : (-1); } +static const std::string escapedChars("\b\t\n\f\r\"\\"); +static const std::string escapedCharsCode("btnfr\"\\"); +static const std::string bracketChars{"\"'`[(<{"}; +static const std::string matchBracketChars("\"'`])>}"); + +CLI11_INLINE bool has_escapable_character(const std::string &str) { + return (str.find_first_of(escapedChars) != std::string::npos); +} + +CLI11_INLINE std::string add_escaped_characters(const std::string &str) { + std::string out; + out.reserve(str.size() + 4); + for(char s : str) { + auto sloc = escapedChars.find_first_of(s); + if(sloc != std::string::npos) { + out.push_back('\\'); + out.push_back(escapedCharsCode[sloc]); + } else { + out.push_back(s); + } + } + return out; +} + +CLI11_INLINE std::uint32_t hexConvert(char hc) { + int hcode{0}; + if(hc >= '0' && hc <= '9') { + hcode = (hc - '0'); + } else if(hc >= 'A' && hc <= 'F') { + hcode = (hc - 'A' + 10); + } else if(hc >= 'a' && hc <= 'f') { + hcode = (hc - 'a' + 10); + } else { + hcode = -1; + } + return static_cast(hcode); +} + +CLI11_INLINE char make_char(std::uint32_t code) { return static_cast(static_cast(code)); } + +CLI11_INLINE void append_codepoint(std::string &str, std::uint32_t code) { + if(code < 0x80) { // ascii code equivalent + str.push_back(static_cast(code)); + } else if(code < 0x800) { // \u0080 to \u07FF + // 110yyyyx 10xxxxxx; 0x3f == 0b0011'1111 + str.push_back(make_char(0xC0 | code >> 6)); + str.push_back(make_char(0x80 | (code & 0x3F))); + } else if(code < 0x10000) { // U+0800...U+FFFF + if(0xD800 <= code && code <= 0xDFFF) { + throw std::invalid_argument("[0xD800, 0xDFFF] are not valid UTF-8."); + } + // 1110yyyy 10yxxxxx 10xxxxxx + str.push_back(make_char(0xE0 | code >> 12)); + str.push_back(make_char(0x80 | (code >> 6 & 0x3F))); + str.push_back(make_char(0x80 | (code & 0x3F))); + } else if(code < 0x110000) { // U+010000 ... U+10FFFF + // 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx + str.push_back(make_char(0xF0 | code >> 18)); + str.push_back(make_char(0x80 | (code >> 12 & 0x3F))); + str.push_back(make_char(0x80 | (code >> 6 & 0x3F))); + str.push_back(make_char(0x80 | (code & 0x3F))); + } +} + +CLI11_INLINE std::string remove_escaped_characters(const std::string &str) { + + std::string out; + out.reserve(str.size()); + for(auto loc = str.begin(); loc < str.end(); ++loc) { + if(*loc == '\\') { + if(str.end() - loc < 2) { + throw std::invalid_argument("invalid escape sequence " + str); + } + auto ecloc = escapedCharsCode.find_first_of(*(loc + 1)); + if(ecloc != std::string::npos) { + out.push_back(escapedChars[ecloc]); + ++loc; + } else if(*(loc + 1) == 'u') { + // must have 4 hex characters + if(str.end() - loc < 6) { + throw std::invalid_argument("unicode sequence must have 4 hex codes " + str); + } + std::uint32_t code{0}; + std::uint32_t mplier{16 * 16 * 16}; + for(int ii = 2; ii < 6; ++ii) { + std::uint32_t res = hexConvert(*(loc + ii)); + if(res > 0x0F) { + throw std::invalid_argument("unicode sequence must have 4 hex codes " + str); + } + code += res * mplier; + mplier = mplier / 16; + } + append_codepoint(out, code); + loc += 5; + } else if(*(loc + 1) == 'U') { + // must have 8 hex characters + if(str.end() - loc < 10) { + throw std::invalid_argument("unicode sequence must have 8 hex codes " + str); + } + std::uint32_t code{0}; + std::uint32_t mplier{16 * 16 * 16 * 16 * 16 * 16 * 16}; + for(int ii = 2; ii < 10; ++ii) { + std::uint32_t res = hexConvert(*(loc + ii)); + if(res > 0x0F) { + throw std::invalid_argument("unicode sequence must have 8 hex codes " + str); + } + code += res * mplier; + mplier = mplier / 16; + } + append_codepoint(out, code); + loc += 9; + } else if(*(loc + 1) == '0') { + out.push_back('\0'); + ++loc; + } else { + throw std::invalid_argument(std::string("unrecognized escape sequence \\") + *(loc + 1) + " in " + str); + } + } else { + out.push_back(*loc); + } + } + return out; +} + +CLI11_INLINE std::size_t close_string_quote(const std::string &str, std::size_t start, char closure_char) { + std::size_t loc{0}; + for(loc = start + 1; loc < str.size(); ++loc) { + if(str[loc] == closure_char) { + break; + } + if(str[loc] == '\\') { + // skip the next character for escaped sequences + ++loc; + } + } + return loc; +} + +CLI11_INLINE std::size_t close_literal_quote(const std::string &str, std::size_t start, char closure_char) { + auto loc = str.find_first_of(closure_char, start + 1); + return (loc != std::string::npos ? loc : str.size()); +} + +CLI11_INLINE std::size_t close_sequence(const std::string &str, std::size_t start, char closure_char) { + + auto bracket_loc = matchBracketChars.find(closure_char); + switch(bracket_loc) { + case 0: + return close_string_quote(str, start, closure_char); + case 1: + case 2: + case std::string::npos: + return close_literal_quote(str, start, closure_char); + default: + break; + } + + std::string closures(1, closure_char); + auto loc = start + 1; + + while(loc < str.size()) { + if(str[loc] == closures.back()) { + closures.pop_back(); + if(closures.empty()) { + return loc; + } + } + bracket_loc = bracketChars.find(str[loc]); + if(bracket_loc != std::string::npos) { + switch(bracket_loc) { + case 0: + loc = close_string_quote(str, loc, str[loc]); + break; + case 1: + case 2: + loc = close_literal_quote(str, loc, str[loc]); + break; + default: + closures.push_back(matchBracketChars[bracket_loc]); + break; + } + } + ++loc; + } + if(loc > str.size()) { + loc = str.size(); + } + return loc; +} + CLI11_INLINE std::vector split_up(std::string str, char delimiter) { - const std::string delims("\'\"`"); auto find_ws = [delimiter](char ch) { return (delimiter == '\0') ? std::isspace(ch, std::locale()) : (ch == delimiter); }; trim(str); std::vector output; - bool embeddedQuote = false; - char keyChar = ' '; while(!str.empty()) { - if(delims.find_first_of(str[0]) != std::string::npos) { - keyChar = str[0]; - auto end = str.find_first_of(keyChar, 1); - while((end != std::string::npos) && (str[end - 1] == '\\')) { // deal with escaped quotes - end = str.find_first_of(keyChar, end + 1); - embeddedQuote = true; - } - if(end != std::string::npos) { - output.push_back(str.substr(1, end - 1)); + if(bracketChars.find_first_of(str[0]) != std::string::npos) { + auto bracketLoc = bracketChars.find_first_of(str[0]); + auto end = close_sequence(str, 0, matchBracketChars[bracketLoc]); + if(end >= str.size()) { + output.push_back(std::move(str)); + str.clear(); + } else { + output.push_back(str.substr(0, end + 1)); if(end + 2 < str.size()) { str = str.substr(end + 2); } else { str.clear(); } - - } else { - output.push_back(str.substr(1)); - str = ""; } + } else { auto it = std::find_if(std::begin(str), std::end(str), find_ws); if(it != std::end(str)) { @@ -946,14 +1075,9 @@ CLI11_INLINE std::vector split_up(std::string str, char delimiter) str = std::string(it + 1, str.end()); } else { output.push_back(str); - str = ""; + str.clear(); } } - // transform any embedded quotes into the regular character - if(embeddedQuote) { - output.back() = find_and_replace(output.back(), std::string("\\") + keyChar, std::string(1, keyChar)); - embeddedQuote = false; - } trim(str); } return output; @@ -971,15 +1095,119 @@ CLI11_INLINE std::size_t escape_detect(std::string &str, std::size_t offset) { return offset + 1; } -CLI11_INLINE std::string &add_quotes_if_needed(std::string &str) { - if((str.front() != '"' && str.front() != '\'') || str.front() != str.back()) { - char quote = str.find('"') < str.find('\'') ? '\'' : '"'; - if(str.find(' ') != std::string::npos) { - str.insert(0, 1, quote); - str.append(1, quote); +CLI11_INLINE std::string binary_escape_string(const std::string &string_to_escape) { + // s is our escaped output string + std::string escaped_string{}; + // loop through all characters + for(char c : string_to_escape) { + // check if a given character is printable + // the cast is necessary to avoid undefined behaviour + if(isprint(static_cast(c)) == 0) { + std::stringstream stream; + // if the character is not printable + // we'll convert it to a hex string using a stringstream + // note that since char is signed we have to cast it to unsigned first + stream << std::hex << static_cast(static_cast(c)); + std::string code = stream.str(); + escaped_string += std::string("\\x") + (code.size() < 2 ? "0" : "") + code; + + } else { + escaped_string.push_back(c); } } - return str; + if(escaped_string != string_to_escape) { + auto sqLoc = escaped_string.find('\''); + while(sqLoc != std::string::npos) { + escaped_string.replace(sqLoc, sqLoc + 1, "\\x27"); + sqLoc = escaped_string.find('\''); + } + escaped_string.insert(0, "'B\"("); + escaped_string.push_back(')'); + escaped_string.push_back('"'); + escaped_string.push_back('\''); + } + return escaped_string; +} + +CLI11_INLINE bool is_binary_escaped_string(const std::string &escaped_string) { + size_t ssize = escaped_string.size(); + if(escaped_string.compare(0, 3, "B\"(") == 0 && escaped_string.compare(ssize - 2, 2, ")\"") == 0) { + return true; + } + return (escaped_string.compare(0, 4, "'B\"(") == 0 && escaped_string.compare(ssize - 3, 3, ")\"'") == 0); +} + +CLI11_INLINE std::string extract_binary_string(const std::string &escaped_string) { + std::size_t start{0}; + std::size_t tail{0}; + size_t ssize = escaped_string.size(); + if(escaped_string.compare(0, 3, "B\"(") == 0 && escaped_string.compare(ssize - 2, 2, ")\"") == 0) { + start = 3; + tail = 2; + } else if(escaped_string.compare(0, 4, "'B\"(") == 0 && escaped_string.compare(ssize - 3, 3, ")\"'") == 0) { + start = 4; + tail = 3; + } + + if(start == 0) { + return escaped_string; + } + std::string outstring; + + outstring.reserve(ssize - start - tail); + std::size_t loc = start; + while(loc < ssize - tail) { + // ssize-2 to skip )" at the end + if(escaped_string[loc] == '\\' && (escaped_string[loc + 1] == 'x' || escaped_string[loc + 1] == 'X')) { + auto c1 = escaped_string[loc + 2]; + auto c2 = escaped_string[loc + 3]; + + std::uint32_t res1 = hexConvert(c1); + std::uint32_t res2 = hexConvert(c2); + if(res1 <= 0x0F && res2 <= 0x0F) { + loc += 4; + outstring.push_back(static_cast(res1 * 16 + res2)); + continue; + } + } + outstring.push_back(escaped_string[loc]); + ++loc; + } + return outstring; +} + +CLI11_INLINE void remove_quotes(std::vector &args) { + for(auto &arg : args) { + if(arg.front() == '\"' && arg.back() == '\"') { + remove_quotes(arg); + // only remove escaped for string arguments not literal strings + arg = remove_escaped_characters(arg); + } else { + remove_quotes(arg); + } + } +} + +CLI11_INLINE bool process_quoted_string(std::string &str, char string_char, char literal_char) { + if(str.size() <= 1) { + return false; + } + if(detail::is_binary_escaped_string(str)) { + str = detail::extract_binary_string(str); + return true; + } + if(str.front() == string_char && str.back() == string_char) { + detail::remove_outer(str, string_char); + if(str.find_first_of('\\') != std::string::npos) { + str = detail::remove_escaped_characters(str); + } + return true; + } + if((str.front() == literal_char || str.front() == '`') && str.back() == str.front()) { + detail::remove_outer(str, str.front()); + return true; + } + return false; } std::string get_environment_value(const std::string &env_name) { @@ -1113,6 +1341,9 @@ class BadNameString : public ConstructionError { return BadNameString("Long names strings require 2 dashes " + name); } static BadNameString BadLongName(std::string name) { return BadNameString("Bad long name: " + name); } + static BadNameString BadPositionalName(std::string name) { + return BadNameString("Invalid positional Name: " + name); + } static BadNameString DashesOnly(std::string name) { return BadNameString("Must have a name, not just dashes: " + name); } @@ -1220,22 +1451,22 @@ class RequiredError : public ParseError { if((min_option == 1) && (max_option == 1) && (used == 0)) return RequiredError("Exactly 1 option from [" + option_list + "]"); if((min_option == 1) && (max_option == 1) && (used > 1)) { - return {"Exactly 1 option from [" + option_list + "] is required and " + std::to_string(used) + + return {"Exactly 1 option from [" + option_list + "] is required but " + std::to_string(used) + " were given", ExitCodes::RequiredError}; } if((min_option == 1) && (used == 0)) return RequiredError("At least 1 option from [" + option_list + "]"); if(used < min_option) { - return {"Requires at least " + std::to_string(min_option) + " options used and only " + - std::to_string(used) + "were given from [" + option_list + "]", + return {"Requires at least " + std::to_string(min_option) + " options used but only " + + std::to_string(used) + " were given from [" + option_list + "]", ExitCodes::RequiredError}; } if(max_option == 1) return {"Requires at most 1 options be given from [" + option_list + "]", ExitCodes::RequiredError}; - return {"Requires at most " + std::to_string(max_option) + " options be used and " + std::to_string(used) + - "were given from [" + option_list + "]", + return {"Requires at most " + std::to_string(max_option) + " options be used but " + std::to_string(used) + + " were given from [" + option_list + "]", ExitCodes::RequiredError}; } }; @@ -1402,6 +1633,23 @@ template <> struct IsMemberType { using type = std::string; }; +namespace adl_detail { +/// Check for existence of user-supplied lexical_cast. +/// +/// This struct has to be in a separate namespace so that it doesn't see our lexical_cast overloads in CLI::detail. +/// Standard says it shouldn't see them if it's defined before the corresponding lexical_cast declarations, but this +/// requires a working implementation of two-phase lookup, and not all compilers can boast that (msvc, ahem). +template class is_lexical_castable { + template + static auto test(int) -> decltype(lexical_cast(std::declval(), std::declval()), std::true_type()); + + template static auto test(...) -> std::false_type; + + public: + static constexpr bool value = decltype(test(0))::value; +}; +} // namespace adl_detail + namespace detail { // These are utilities for IsMember and other transforming objects @@ -1483,7 +1731,7 @@ template class is_direct_constructible { #pragma diag_suppress 2361 #endif #endif - TT{std::declval()} + TT{std::declval()} #ifdef __CUDACC__ #ifdef __NVCC_DIAG_PRAGMA_SUPPORT__ #pragma nv_diag_default 2361 @@ -1491,8 +1739,8 @@ template class is_direct_constructible { #pragma diag_default 2361 #endif #endif - , - std::is_move_assignable()); + , + std::is_move_assignable()); template static auto test(int, std::false_type) -> std::false_type; @@ -2179,7 +2427,7 @@ bool integral_conversion(const std::string &input, T &output) noexcept { if(input.empty() || input.front() == '-') { return false; } - char *val = nullptr; + char *val{nullptr}; errno = 0; std::uint64_t output_ll = std::strtoull(input.c_str(), &val, 0); if(errno == ERANGE) { @@ -2195,6 +2443,33 @@ bool integral_conversion(const std::string &input, T &output) noexcept { output = (output_sll < 0) ? static_cast(0) : static_cast(output_sll); return (static_cast(output) == output_sll); } + // remove separators + if(input.find_first_of("_'") != std::string::npos) { + std::string nstring = input; + nstring.erase(std::remove(nstring.begin(), nstring.end(), '_'), nstring.end()); + nstring.erase(std::remove(nstring.begin(), nstring.end(), '\''), nstring.end()); + return integral_conversion(nstring, output); + } + if(input.compare(0, 2, "0o") == 0) { + val = nullptr; + errno = 0; + output_ll = std::strtoull(input.c_str() + 2, &val, 8); + if(errno == ERANGE) { + return false; + } + output = static_cast(output_ll); + return (val == (input.c_str() + input.size()) && static_cast(output) == output_ll); + } + if(input.compare(0, 2, "0b") == 0) { + val = nullptr; + errno = 0; + output_ll = std::strtoull(input.c_str() + 2, &val, 2); + if(errno == ERANGE) { + return false; + } + output = static_cast(output_ll); + return (val == (input.c_str() + input.size()) && static_cast(output) == output_ll); + } return false; } @@ -2219,11 +2494,38 @@ bool integral_conversion(const std::string &input, T &output) noexcept { output = static_cast(1); return true; } + // remove separators + if(input.find_first_of("_'") != std::string::npos) { + std::string nstring = input; + nstring.erase(std::remove(nstring.begin(), nstring.end(), '_'), nstring.end()); + nstring.erase(std::remove(nstring.begin(), nstring.end(), '\''), nstring.end()); + return integral_conversion(nstring, output); + } + if(input.compare(0, 2, "0o") == 0) { + val = nullptr; + errno = 0; + output_ll = std::strtoll(input.c_str() + 2, &val, 8); + if(errno == ERANGE) { + return false; + } + output = static_cast(output_ll); + return (val == (input.c_str() + input.size()) && static_cast(output) == output_ll); + } + if(input.compare(0, 2, "0b") == 0) { + val = nullptr; + errno = 0; + output_ll = std::strtoll(input.c_str() + 2, &val, 2); + if(errno == ERANGE) { + return false; + } + output = static_cast(output_ll); + return (val == (input.c_str() + input.size()) && static_cast(output) == output_ll); + } return false; } -/// Convert a flag into an integer value typically binary flags -inline std::int64_t to_flag_value(std::string val) { +/// Convert a flag into an integer value typically binary flags sets errno to nonzero if conversion failed +inline std::int64_t to_flag_value(std::string val) noexcept { static const std::string trueString("true"); static const std::string falseString("false"); if(val == trueString) { @@ -2251,7 +2553,8 @@ inline std::int64_t to_flag_value(std::string val) { ret = 1; break; default: - throw std::invalid_argument("unrecognized character"); + errno = EINVAL; + return -1; } return ret; } @@ -2260,7 +2563,11 @@ inline std::int64_t to_flag_value(std::string val) { } else if(val == falseString || val == "off" || val == "no" || val == "disable") { ret = -1; } else { - ret = std::stoll(val); + char *loc_ptr{nullptr}; + ret = std::strtoll(val.c_str(), &loc_ptr, 0); + if(loc_ptr != (val.c_str() + val.size()) && errno == 0) { + errno = EINVAL; + } } return ret; } @@ -2289,18 +2596,16 @@ bool lexical_cast(const std::string &input, T &output) { template ::value == object_category::boolean_value, detail::enabler> = detail::dummy> bool lexical_cast(const std::string &input, T &output) { - try { - auto out = to_flag_value(input); + errno = 0; + auto out = to_flag_value(input); + if(errno == 0) { output = (out > 0); - return true; - } catch(const std::invalid_argument &) { - return false; - } catch(const std::out_of_range &) { - // if the number is out of the range of a 64 bit value then it is still a number and for this purpose is still - // valid all we care about the sign + } else if(errno == ERANGE) { output = (input[0] != '-'); - return true; + } else { + return false; } + return true; } /// Floats @@ -2313,7 +2618,17 @@ bool lexical_cast(const std::string &input, T &output) { char *val = nullptr; auto output_ld = std::strtold(input.c_str(), &val); output = static_cast(output_ld); - return val == (input.c_str() + input.size()); + if(val == (input.c_str() + input.size())) { + return true; + } + // remove separators + if(input.find_first_of("_'") != std::string::npos) { + std::string nstring = input; + nstring.erase(std::remove(nstring.begin(), nstring.end(), '_'), nstring.end()); + nstring.erase(std::remove(nstring.begin(), nstring.end(), '\''), nstring.end()); + return lexical_cast(nstring, output); + } + return false; } /// complex @@ -2495,13 +2810,24 @@ bool lexical_cast(const std::string &input, T &output) { /// Non-string parsable by a stream template ::value == object_category::other && !std::is_assignable::value, + enable_if_t::value == object_category::other && !std::is_assignable::value && + is_istreamable::value, detail::enabler> = detail::dummy> bool lexical_cast(const std::string &input, T &output) { - static_assert(is_istreamable::value, + return from_stream(input, output); +} + +/// Fallback overload that prints a human-readable error for types that we don't recognize and that don't have a +/// user-supplied lexical_cast overload. +template ::value == object_category::other && !std::is_assignable::value && + !is_istreamable::value && !adl_detail::is_lexical_castable::value, + detail::enabler> = detail::dummy> +bool lexical_cast(const std::string & /*input*/, T & /*output*/) { + static_assert(!std::is_same::value, // Can't just write false here. "option object type must have a lexical cast overload or streaming input operator(>>) defined, if it " "is convertible from another type use the add_option(...) with XC being the known type"); - return from_stream(input, output); + return false; } /// Assign a value through lexical cast operations @@ -2634,9 +2960,7 @@ bool lexical_conversion(const std::vector &strings, AssignTo &outp FirstType v1; SecondType v2; bool retval = lexical_assign(strings[0], v1); - if(strings.size() > 1) { - retval = retval && lexical_assign(strings[1], v2); - } + retval = retval && lexical_assign((strings.size() > 1) ? strings[1] : std::string{}, v2); if(retval) { output = AssignTo{v1, v2}; } @@ -2956,12 +3280,13 @@ inline std::string sum_string_vector(const std::vector &values) { double tv{0.0}; auto comp = lexical_cast(arg, tv); if(!comp) { - try { - tv = static_cast(detail::to_flag_value(arg)); - } catch(const std::exception &) { - fail = true; + errno = 0; + auto fv = detail::to_flag_value(arg); + fail = (errno != 0); + if(fail) { break; } + tv = static_cast(fv); } val += tv; } @@ -3091,7 +3416,6 @@ get_names(const std::vector &input) { std::vector short_names; std::vector long_names; std::string pos_name; - for(std::string name : input) { if(name.length() == 0) { continue; @@ -3112,12 +3436,15 @@ get_names(const std::vector &input) { } else if(name == "-" || name == "--") { throw BadNameString::DashesOnly(name); } else { - if(pos_name.length() > 0) + if(!pos_name.empty()) throw BadNameString::MultiPositionalNames(name); - pos_name = name; + if(valid_name_string(name)) { + pos_name = name; + } else { + throw BadNameString::BadPositionalName(name); + } } } - return std::make_tuple(short_names, long_names, pos_name); } @@ -3134,7 +3461,6 @@ struct ConfigItem { /// This is the name std::string name{}; - /// Listing of inputs std::vector inputs{}; @@ -3197,8 +3523,8 @@ class ConfigBase : public Config { char valueDelimiter = '='; /// the character to use around strings char stringQuote = '"'; - /// the character to use around single characters - char characterQuote = '\''; + /// the character to use around single characters and literal strings + char literalQuote = '\''; /// the maximum number of layers to allow uint8_t maximumLayers{255}; /// the separator used to separator parent layers @@ -3234,10 +3560,10 @@ class ConfigBase : public Config { valueDelimiter = vSep; return this; } - /// Specify the quote characters used around strings and characters - ConfigBase *quoteCharacter(char qString, char qChar) { + /// Specify the quote characters used around strings and literal strings + ConfigBase *quoteCharacter(char qString, char literalChar) { stringQuote = qString; - characterQuote = qChar; + literalQuote = literalChar; return this; } /// Specify the maximum number of parents @@ -3469,6 +3795,11 @@ class IPV4Validator : public Validator { IPV4Validator(); }; +class EscapedStringTransformer : public Validator { + public: + EscapedStringTransformer(); +}; + } // namespace detail // Static is not needed here, because global const implies static. @@ -3488,6 +3819,9 @@ const detail::NonexistentPathValidator NonexistentPath; /// Check for an IP4 address const detail::IPV4Validator ValidIPV4; +/// convert escaped characters into their associated values +const detail::EscapedStringTransformer EscapedString; + /// Validate the input as a particular type template class TypeValidator : public Validator { public: @@ -4247,7 +4581,7 @@ CLI11_INLINE path_type check_path(const char *file) noexcept { switch(stat.type()) { case std::filesystem::file_type::none: // LCOV_EXCL_LINE case std::filesystem::file_type::not_found: - return path_type::nonexistent; + return path_type::nonexistent; // LCOV_EXCL_LINE case std::filesystem::file_type::directory: return path_type::directory; case std::filesystem::file_type::symlink: @@ -4341,10 +4675,29 @@ CLI11_INLINE IPV4Validator::IPV4Validator() : Validator("IPV4") { return std::string("Each IP number must be between 0 and 255 ") + var; } } - return std::string(); + return std::string{}; }; } +CLI11_INLINE EscapedStringTransformer::EscapedStringTransformer() { + func_ = [](std::string &str) { + try { + if(str.size() > 1 && (str.front() == '\"' || str.front() == '\'' || str.front() == '`') && + str.front() == str.back()) { + process_quoted_string(str); + } else if(str.find_first_of('\\') != std::string::npos) { + if(detail::is_binary_escaped_string(str)) { + str = detail::extract_binary_string(str); + } else { + str = remove_escaped_characters(str); + } + } + return std::string{}; + } catch(const std::invalid_argument &ia) { + return std::string(ia.what()); + } + }; +} } // namespace detail CLI11_INLINE FileOnDefaultPath::FileOnDefaultPath(std::string default_path, bool enableErrorReturn) @@ -4643,7 +4996,8 @@ enum class MultiOptionPolicy : char { TakeFirst, //!< take only the first Expected number of arguments Join, //!< merge all the arguments together into a single string via the delimiter character default('\n') TakeAll, //!< just get all the passed argument regardless - Sum //!< sum all the arguments together if numerical or concatenate directly without delimiter + Sum, //!< sum all the arguments together if numerical or concatenate directly without delimiter + Reverse, //!< take only the last Expected number of arguments in reverse order }; /// This is the CRTP base class for Option and OptionDefaults. It was designed this way @@ -5151,12 +5505,12 @@ class Option : public OptionBase