Skip to content

Commit

Permalink
[libc++] Define an internal locale API as a shim on top of the curren…
Browse files Browse the repository at this point in the history
…t one (#114596)

Our current locale base API is a mix of non-reserved system names that
we incorrectly (re)define and internal functions and macros starting
with __libcpp. This patch introduces a function-based internal interface
to isolate the rest of the code base from that mess, so that we can work
on refactoring how each platform implements the base API in subsequent
patches. This makes it possible to refactor how each platform implements
the base localization API without impacting the rest of the code base.
  • Loading branch information
ldionne authored Nov 6, 2024
1 parent 9b016e3 commit 5d8be4c
Show file tree
Hide file tree
Showing 8 changed files with 423 additions and 227 deletions.
10 changes: 5 additions & 5 deletions libcxx/include/__locale
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ class _LIBCPP_TEMPLATE_VIS collate_byname;

template <>
class _LIBCPP_EXPORTED_FROM_ABI collate_byname<char> : public collate<char> {
locale_t __l_;
__locale::__locale_t __l_;

public:
typedef char char_type;
Expand All @@ -266,7 +266,7 @@ protected:
#if _LIBCPP_HAS_WIDE_CHARACTERS
template <>
class _LIBCPP_EXPORTED_FROM_ABI collate_byname<wchar_t> : public collate<wchar_t> {
locale_t __l_;
__locale::__locale_t __l_;

public:
typedef wchar_t char_type;
Expand Down Expand Up @@ -616,7 +616,7 @@ class _LIBCPP_TEMPLATE_VIS ctype_byname;

template <>
class _LIBCPP_EXPORTED_FROM_ABI ctype_byname<char> : public ctype<char> {
locale_t __l_;
__locale::__locale_t __l_;

public:
explicit ctype_byname(const char*, size_t = 0);
Expand All @@ -633,7 +633,7 @@ protected:
#if _LIBCPP_HAS_WIDE_CHARACTERS
template <>
class _LIBCPP_EXPORTED_FROM_ABI ctype_byname<wchar_t> : public ctype<wchar_t> {
locale_t __l_;
__locale::__locale_t __l_;

public:
explicit ctype_byname(const char*, size_t = 0);
Expand Down Expand Up @@ -824,7 +824,7 @@ protected:
#if _LIBCPP_HAS_WIDE_CHARACTERS
template <>
class _LIBCPP_EXPORTED_FROM_ABI codecvt<wchar_t, char, mbstate_t> : public locale::facet, public codecvt_base {
locale_t __l_;
__locale::__locale_t __l_;

public:
typedef wchar_t intern_type;
Expand Down
307 changes: 243 additions & 64 deletions libcxx/include/__locale_dir/locale_base_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,93 @@

#include <__config>

#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif

// The platform-specific headers have to provide the following interface.
//
// These functions are equivalent to their C counterparts, except that __locale::__locale_t
// is used instead of the current global locale.
//
// Variadic functions may be implemented as templates with a parameter pack instead
// of C-style variadic functions.
//
// TODO: I think __uselocale() is not necessary if we refactor a bit.
// TODO: __localeconv shouldn't take a reference, but the Windows implementation doesn't allow copying __locale_t
//
// Locale management
// -----------------
// namespace __locale {
// using __locale_t = implementation-defined;
// __locale_t __uselocale(__locale_t);
// __locale_t __newlocale(int, const char*, __locale_t);
// void __freelocale(__locale_t);
// lconv* __localeconv(__locale_t&);
// }
//
// Strtonum functions
// ------------------
// namespace __locale {
// float __strtof(const char*, char**, __locale_t);
// double __strtod(const char*, char**, __locale_t);
// long double __strtold(const char*, char**, __locale_t);
// long long __strtoll(const char*, char**, __locale_t);
// unsigned long long __strtoull(const char*, char**, __locale_t);
// }
//
// Character manipulation functions
// --------------------------------
// namespace __locale {
// int __islower(int, __locale_t);
// int __isupper(int, __locale_t);
// int __isdigit(int, __locale_t);
// int __isxdigit(int, __locale_t);
// int __toupper(int, __locale_t);
// int __tolower(int, __locale_t);
// int __strcoll(const char*, const char*, __locale_t);
// size_t __strxfrm(char*, const char*, size_t, __locale_t);
//
// int __iswspace(wint_t, __locale_t);
// int __iswprint(wint_t, __locale_t);
// int __iswcntrl(wint_t, __locale_t);
// int __iswupper(wint_t, __locale_t);
// int __iswlower(wint_t, __locale_t);
// int __iswalpha(wint_t, __locale_t);
// int __iswblank(wint_t, __locale_t);
// int __iswdigit(wint_t, __locale_t);
// int __iswpunct(wint_t, __locale_t);
// int __iswxdigit(wint_t, __locale_t);
// wint_t __towupper(wint_t, __locale_t);
// wint_t __towlower(wint_t, __locale_t);
// int __wcscoll(const wchar_t*, const wchar_t*, __locale_t);
// size_t __wcsxfrm(wchar_t*, const wchar_t*, size_t, __locale_t);
//
// size_t __strftime(char*, size_t, const char*, const tm*, __locale_t);
// }
//
// Other functions
// ---------------
// namespace __locale {
// implementation-defined __mb_len_max(__locale_t);
// wint_t __btowc(int, __locale_t);
// int __wctob(wint_t, __locale_t);
// size_t __wcsnrtombs(char*, const wchar_t**, size_t, size_t, mbstate_t*, __locale_t);
// size_t __wcrtomb(char*, wchar_t, mbstate_t*, __locale_t);
// size_t __mbsnrtowcs(wchar_t*, const char**, size_t, size_t, mbstate_t*, __locale_t);
// size_t __mbrtowc(wchar_t*, const char*, size_t, mbstate_t*, __locale_t);
// int __mbtowc(wchar_t*, const char*, size_t, __locale_t);
// size_t __mbrlen(const char*, size_t, mbstate_t*, __locale_t);
// size_t __mbsrtowcs(wchar_t*, const char**, size_t, mbstate_t*, __locale_t);
// int __snprintf(char*, size_t, __locale_t, const char*, ...);
// int __asprintf(char**, __locale_t, const char*, ...);
// int __sscanf(const char*, __locale_t, const char*, ...);
// }

// TODO: This is a temporary definition to bridge between the old way we defined the locale base API
// (by providing global non-reserved names) and the new API. As we move individual platforms
// towards the new way of defining the locale base API, this should disappear since each platform
// will define those directly.
#if defined(_LIBCPP_MSVCRT_LIKE)
# include <__locale_dir/locale_base_api/win32.h>
#elif defined(_AIX) || defined(__MVS__)
Expand All @@ -35,71 +122,163 @@
# include <__locale_dir/locale_base_api/bsd_locale_fallbacks.h>
#endif

#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#include <__cstddef/size_t.h>
#include <__utility/forward.h>
#include <ctype.h>
#include <string.h>
#include <time.h>
#if _LIBCPP_HAS_WIDE_CHARACTERS
# include <wctype.h>
#endif
_LIBCPP_BEGIN_NAMESPACE_STD
namespace __locale {
//
// Locale management
//
using __locale_t = locale_t;

#ifndef _LIBCPP_MSVCRT_LIKE
inline _LIBCPP_HIDE_FROM_ABI __locale_t __uselocale(__locale_t __loc) { return uselocale(__loc); }
#endif

inline _LIBCPP_HIDE_FROM_ABI __locale_t __newlocale(int __category_mask, const char* __name, __locale_t __loc) {
return newlocale(__category_mask, __name, __loc);
}

inline _LIBCPP_HIDE_FROM_ABI void __freelocale(__locale_t __loc) { freelocale(__loc); }

inline _LIBCPP_HIDE_FROM_ABI lconv* __localeconv(__locale_t& __loc) { return __libcpp_localeconv_l(__loc); }

//
// Strtonum functions
//
inline _LIBCPP_HIDE_FROM_ABI float __strtof(const char* __nptr, char** __endptr, __locale_t __loc) {
return strtof_l(__nptr, __endptr, __loc);
}

inline _LIBCPP_HIDE_FROM_ABI double __strtod(const char* __nptr, char** __endptr, __locale_t __loc) {
return strtod_l(__nptr, __endptr, __loc);
}

inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __endptr, __locale_t __loc) {
return strtold_l(__nptr, __endptr, __loc);
}

inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
return strtoll_l(__nptr, __endptr, __base, __loc);
}

inline _LIBCPP_HIDE_FROM_ABI unsigned long long
__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
return strtoull_l(__nptr, __endptr, __base, __loc);
}

//
// Character manipulation functions
//
inline _LIBCPP_HIDE_FROM_ABI int __islower(int __ch, __locale_t __loc) { return islower_l(__ch, __loc); }
inline _LIBCPP_HIDE_FROM_ABI int __isupper(int __ch, __locale_t __loc) { return isupper_l(__ch, __loc); }
inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __ch, __locale_t __loc) { return isdigit_l(__ch, __loc); }
inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __ch, __locale_t __loc) { return isxdigit_l(__ch, __loc); }
inline _LIBCPP_HIDE_FROM_ABI int __strcoll(const char* __s1, const char* __s2, __locale_t __loc) {
return strcoll_l(__s1, __s2, __loc);
}
inline _LIBCPP_HIDE_FROM_ABI size_t __strxfrm(char* __dest, const char* __src, size_t __n, __locale_t __loc) {
return strxfrm_l(__dest, __src, __n, __loc);
}
inline _LIBCPP_HIDE_FROM_ABI int __toupper(int __ch, __locale_t __loc) { return toupper_l(__ch, __loc); }
inline _LIBCPP_HIDE_FROM_ABI int __tolower(int __ch, __locale_t __loc) { return tolower_l(__ch, __loc); }

#if _LIBCPP_HAS_WIDE_CHARACTERS
inline _LIBCPP_HIDE_FROM_ABI int __wcscoll(const wchar_t* __s1, const wchar_t* __s2, __locale_t __loc) {
return wcscoll_l(__s1, __s2, __loc);
}
inline _LIBCPP_HIDE_FROM_ABI size_t __wcsxfrm(wchar_t* __dest, const wchar_t* __src, size_t __n, __locale_t __loc) {
return wcsxfrm_l(__dest, __src, __n, __loc);
}
inline _LIBCPP_HIDE_FROM_ABI int __iswspace(wint_t __ch, __locale_t __loc) { return iswspace_l(__ch, __loc); }
inline _LIBCPP_HIDE_FROM_ABI int __iswprint(wint_t __ch, __locale_t __loc) { return iswprint_l(__ch, __loc); }
inline _LIBCPP_HIDE_FROM_ABI int __iswcntrl(wint_t __ch, __locale_t __loc) { return iswcntrl_l(__ch, __loc); }
inline _LIBCPP_HIDE_FROM_ABI int __iswupper(wint_t __ch, __locale_t __loc) { return iswupper_l(__ch, __loc); }
inline _LIBCPP_HIDE_FROM_ABI int __iswlower(wint_t __ch, __locale_t __loc) { return iswlower_l(__ch, __loc); }
inline _LIBCPP_HIDE_FROM_ABI int __iswalpha(wint_t __ch, __locale_t __loc) { return iswalpha_l(__ch, __loc); }
inline _LIBCPP_HIDE_FROM_ABI int __iswblank(wint_t __ch, __locale_t __loc) { return iswblank_l(__ch, __loc); }
inline _LIBCPP_HIDE_FROM_ABI int __iswdigit(wint_t __ch, __locale_t __loc) { return iswdigit_l(__ch, __loc); }
inline _LIBCPP_HIDE_FROM_ABI int __iswpunct(wint_t __ch, __locale_t __loc) { return iswpunct_l(__ch, __loc); }
inline _LIBCPP_HIDE_FROM_ABI int __iswxdigit(wint_t __ch, __locale_t __loc) { return iswxdigit_l(__ch, __loc); }
inline _LIBCPP_HIDE_FROM_ABI wint_t __towupper(wint_t __ch, __locale_t __loc) { return towupper_l(__ch, __loc); }
inline _LIBCPP_HIDE_FROM_ABI wint_t __towlower(wint_t __ch, __locale_t __loc) { return towlower_l(__ch, __loc); }
#endif

/*
The platform-specific headers have to provide the following interface:
// TODO: rename this to __libcpp_locale_t
using locale_t = implementation-defined;
implementation-defined __libcpp_mb_cur_max_l(locale_t);
wint_t __libcpp_btowc_l(int, locale_t);
int __libcpp_wctob_l(wint_t, locale_t);
size_t __libcpp_wcsnrtombs_l(char* dest, const wchar_t** src, size_t wide_char_count, size_t len, mbstate_t, locale_t);
size_t __libcpp_wcrtomb_l(char* str, wchar_t wide_char, mbstate_t*, locale_t);
size_t __libcpp_mbsnrtowcs_l(wchar_t* dest, const char** src, size_t max_out, size_t len, mbstate_t*, locale_t);
size_t __libcpp_mbrtowc_l(wchar_t* dest, cosnt char* src, size_t count, mbstate_t*, locale_t);
int __libcpp_mbtowc_l(wchar_t* dest, const char* src, size_t count, locale_t);
size_t __libcpp_mbrlen_l(const char* str, size_t count, mbstate_t*, locale_t);
// TODO: __libcpp_localeconv_l shouldn't take a reference, but the Windows implementation doesn't allow copying locale_t
lconv* __libcpp_localeconv_l(locale_t&);
size_t __libcpp_mbsrtowcs_l(wchar_t* dest, const char** src, size_t len, mbstate_t*, locale_t);
int __libcpp_snprintf_l(char* dest, size_t buff_size, locale_t, const char* format, ...);
int __libcpp_asprintf_l(char** dest, locale_t, const char* format, ...);
int __libcpp_sscanf_l(const char* dest, locale_t, const char* format, ...);
// TODO: change these to reserved names
float strtof_l(const char* str, char** str_end, locale_t);
double strtod_l(const char* str, char** str_end, locale_t);
long double strtold_l(const char* str, char** str_end, locale_t);
long long strtoll_l(const char* str, char** str_end, locale_t);
unsigned long long strtoull_l(const char* str, char** str_end, locale_t);
locale_t newlocale(int category_mask, const char* locale, locale_t base);
void freelocale(locale_t);
int islower_l(int ch, locale_t);
int isupper_l(int ch, locale_t);
int isdigit_l(int ch, locale_t);
int isxdigit_l(int ch, locale_t);
int strcoll_l(const char* lhs, const char* rhs, locale_t);
size_t strxfrm_l(char* dst, const char* src, size_t n, locale_t);
int wcscoll_l(const char* lhs, const char* rhs, locale_t);
size_t wcsxfrm_l(wchar_t* dst, const wchar_t* src, size_t n, locale_t);
int toupper_l(int ch, locale_t);
int tolower_l(int ch, locale_t);
int iswspace_l(wint_t ch, locale_t);
int iswprint_l(wint_t ch, locale_t);
int iswcntrl_l(wint_t ch, locale_t);
int iswupper_l(wint_t ch, locale_t);
int iswlower_l(wint_t ch, locale_t);
int iswalpha_l(wint_t ch, locale_t);
int iswblank_l(wint_t ch, locale_t);
int iswdigit_l(wint_t ch, locale_t);
int iswpunct_l(wint_t ch, locale_t);
int iswxdigit_l(wint_t ch, locale_t);
wint_t towupper_l(wint_t ch, locale_t);
wint_t towlower_l(wint_t ch, locale_t);
size_t strftime_l(char* str, size_t len, const char* format, const tm*, locale_t);
These functions are equivalent to their C counterparts,
except that locale_t is used instead of the current global locale.
The variadic functions may be implemented as templates with a parameter pack instead of variadic functions.
*/
inline _LIBCPP_HIDE_FROM_ABI size_t
__strftime(char* __s, size_t __max, const char* __format, const tm* __tm, __locale_t __loc) {
return strftime_l(__s, __max, __format, __tm, __loc);
}

//
// Other functions
//
inline _LIBCPP_HIDE_FROM_ABI decltype(__libcpp_mb_cur_max_l(__locale_t())) __mb_len_max(__locale_t __loc) {
return __libcpp_mb_cur_max_l(__loc);
}
#if _LIBCPP_HAS_WIDE_CHARACTERS
inline _LIBCPP_HIDE_FROM_ABI wint_t __btowc(int __ch, __locale_t __loc) { return __libcpp_btowc_l(__ch, __loc); }
inline _LIBCPP_HIDE_FROM_ABI int __wctob(wint_t __ch, __locale_t __loc) { return __libcpp_wctob_l(__ch, __loc); }
inline _LIBCPP_HIDE_FROM_ABI size_t
__wcsnrtombs(char* __dest, const wchar_t** __src, size_t __nwc, size_t __len, mbstate_t* __ps, __locale_t __loc) {
return __libcpp_wcsnrtombs_l(__dest, __src, __nwc, __len, __ps, __loc);
}
inline _LIBCPP_HIDE_FROM_ABI size_t __wcrtomb(char* __s, wchar_t __ch, mbstate_t* __ps, __locale_t __loc) {
return __libcpp_wcrtomb_l(__s, __ch, __ps, __loc);
}
inline _LIBCPP_HIDE_FROM_ABI size_t
__mbsnrtowcs(wchar_t* __dest, const char** __src, size_t __nms, size_t __len, mbstate_t* __ps, __locale_t __loc) {
return __libcpp_mbsnrtowcs_l(__dest, __src, __nms, __len, __ps, __loc);
}
inline _LIBCPP_HIDE_FROM_ABI size_t
__mbrtowc(wchar_t* __pwc, const char* __s, size_t __n, mbstate_t* __ps, __locale_t __loc) {
return __libcpp_mbrtowc_l(__pwc, __s, __n, __ps, __loc);
}
inline _LIBCPP_HIDE_FROM_ABI int __mbtowc(wchar_t* __pwc, const char* __pmb, size_t __max, __locale_t __loc) {
return __libcpp_mbtowc_l(__pwc, __pmb, __max, __loc);
}
inline _LIBCPP_HIDE_FROM_ABI size_t __mbrlen(const char* __s, size_t __n, mbstate_t* __ps, __locale_t __loc) {
return __libcpp_mbrlen_l(__s, __n, __ps, __loc);
}
inline _LIBCPP_HIDE_FROM_ABI size_t
__mbsrtowcs(wchar_t* __dest, const char** __src, size_t __len, mbstate_t* __ps, __locale_t __loc) {
return __libcpp_mbsrtowcs_l(__dest, __src, __len, __ps, __loc);
}
#endif

_LIBCPP_DIAGNOSTIC_PUSH
_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wgcc-compat")
_LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wformat-nonliteral") // GCC doesn't support [[gnu::format]] on variadic templates
#ifdef _LIBCPP_COMPILER_CLANG_BASED
# define _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(...) _LIBCPP_ATTRIBUTE_FORMAT(__VA_ARGS__)
#else
# define _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(...) /* nothing */
#endif

template <class... _Args>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 4, 5) int __snprintf(
char* __s, size_t __n, __locale_t __loc, const char* __format, _Args&&... __args) {
return __libcpp_snprintf_l(__s, __n, __loc, __format, std::forward<_Args>(__args)...);
}
template <class... _Args>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __asprintf(
char** __s, __locale_t __loc, const char* __format, _Args&&... __args) {
return __libcpp_asprintf_l(__s, __loc, __format, std::forward<_Args>(__args)...);
}
template <class... _Args>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf(
const char* __s, __locale_t __loc, const char* __format, _Args&&... __args) {
return __libcpp_sscanf_l(__s, __loc, __format, std::forward<_Args>(__args)...);
}
_LIBCPP_DIAGNOSTIC_POP
#undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT

} // namespace __locale
_LIBCPP_END_NAMESPACE_STD

#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_H
8 changes: 8 additions & 0 deletions libcxx/include/__locale_dir/locale_base_api/apple.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@
#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_APPLE_H
#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_APPLE_H

#include <__config>
#include <ctype.h>
#include <string.h>
#include <time.h>
#if _LIBCPP_HAS_WIDE_CHARACTERS
# include <wctype.h>
#endif

#include <xlocale.h>

#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_APPLE_H
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __l
return ::sscanf_l(__s, __loc, __format, std::forward<_Args>(__args)...);
}
_LIBCPP_DIAGNOSTIC_POP
#undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT

_LIBCPP_END_NAMESPACE_STD

Expand Down
Loading

0 comments on commit 5d8be4c

Please sign in to comment.