Skip to content
This repository has been archived by the owner on Jun 7, 2023. It is now read-only.

Commit

Permalink
Merge pull request #17426 from Wikia/PLATFORM-4285
Browse files Browse the repository at this point in the history
(PLATFORM-4285) Add overrides for mb_strtoupper in Language::ucfirst
  • Loading branch information
Michal-Chatlas1221 authored Aug 30, 2019
2 parents d60c2d3 + de114ae commit c65b1b2
Show file tree
Hide file tree
Showing 2 changed files with 346 additions and 1 deletion.
325 changes: 325 additions & 0 deletions includes/wikia/VariablesBase.php
Original file line number Diff line number Diff line change
Expand Up @@ -8999,3 +8999,328 @@
* @var $wgEnableHydralyticsExt
*/
$wgEnableHydralyticsExt = true;

/**
* List of Unicode characters for which capitalization is overridden in
* Language::ucfirst. The characters should be
* represented as char_to_convert => conversion_override. See T219279 for details
* on why this is useful during php version transitions.
*
* @warning: EXPERIMENTAL!
*
* @var array
*/
$wgOverrideUcfirstCharacters = [
'ß' => 'ß',
'ʼn' => 'ʼn',
'Dž' => 'Dž',
'dž' => 'Dž',
'Lj' => 'Lj',
'lj' => 'Lj',
'Nj' => 'Nj',
'nj' => 'Nj',
'ǰ' => 'ǰ',
'Dz' => 'Dz',
'dz' => 'Dz',
'ɪ' => 'ɪ',
'ͅ' => 'ͅ',
'ΐ' => 'ΐ',
'ΰ' => 'ΰ',
'և' => 'և',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'' => '',
'𐓘' => '𐓘',
'𐓙' => '𐓙',
'𐓚' => '𐓚',
'𐓛' => '𐓛',
'𐓜' => '𐓜',
'𐓝' => '𐓝',
'𐓞' => '𐓞',
'𐓟' => '𐓟',
'𐓠' => '𐓠',
'𐓡' => '𐓡',
'𐓢' => '𐓢',
'𐓣' => '𐓣',
'𐓤' => '𐓤',
'𐓥' => '𐓥',
'𐓦' => '𐓦',
'𐓧' => '𐓧',
'𐓨' => '𐓨',
'𐓩' => '𐓩',
'𐓪' => '𐓪',
'𐓫' => '𐓫',
'𐓬' => '𐓬',
'𐓭' => '𐓭',
'𐓮' => '𐓮',
'𐓯' => '𐓯',
'𐓰' => '𐓰',
'𐓱' => '𐓱',
'𐓲' => '𐓲',
'𐓳' => '𐓳',
'𐓴' => '𐓴',
'𐓵' => '𐓵',
'𐓶' => '𐓶',
'𐓷' => '𐓷',
'𐓸' => '𐓸',
'𐓹' => '𐓹',
'𐓺' => '𐓺',
'𐓻' => '𐓻',
'𖹠' => '𖹠',
'𖹡' => '𖹡',
'𖹢' => '𖹢',
'𖹣' => '𖹣',
'𖹤' => '𖹤',
'𖹥' => '𖹥',
'𖹦' => '𖹦',
'𖹧' => '𖹧',
'𖹨' => '𖹨',
'𖹩' => '𖹩',
'𖹪' => '𖹪',
'𖹫' => '𖹫',
'𖹬' => '𖹬',
'𖹭' => '𖹭',
'𖹮' => '𖹮',
'𖹯' => '𖹯',
'𖹰' => '𖹰',
'𖹱' => '𖹱',
'𖹲' => '𖹲',
'𖹳' => '𖹳',
'𖹴' => '𖹴',
'𖹵' => '𖹵',
'𖹶' => '𖹶',
'𖹷' => '𖹷',
'𖹸' => '𖹸',
'𖹹' => '𖹹',
'𖹺' => '𖹺',
'𖹻' => '𖹻',
'𖹼' => '𖹼',
'𖹽' => '𖹽',
'𖹾' => '𖹾',
'𖹿' => '𖹿',
'𞤢' => '𞤢',
'𞤣' => '𞤣',
'𞤤' => '𞤤',
'𞤥' => '𞤥',
'𞤦' => '𞤦',
'𞤧' => '𞤧',
'𞤨' => '𞤨',
'𞤩' => '𞤩',
'𞤪' => '𞤪',
'𞤫' => '𞤫',
'𞤬' => '𞤬',
'𞤭' => '𞤭',
'𞤮' => '𞤮',
'𞤯' => '𞤯',
'𞤰' => '𞤰',
'𞤱' => '𞤱',
'𞤲' => '𞤲',
'𞤳' => '𞤳',
'𞤴' => '𞤴',
'𞤵' => '𞤵',
'𞤶' => '𞤶',
'𞤷' => '𞤷',
'𞤸' => '𞤸',
'𞤹' => '𞤹',
'𞤺' => '𞤺',
'𞤻' => '𞤻',
'𞤼' => '𞤼',
'𞤽' => '𞤽',
'𞤾' => '𞤾',
'𞤿' => '𞤿',
'𞥀' => '𞥀',
'𞥁' => '𞥁',
'𞥂' => '𞥂',
'𞥃' => '𞥃',
];
22 changes: 21 additions & 1 deletion languages/Language.php
Original file line number Diff line number Diff line change
Expand Up @@ -2168,7 +2168,7 @@ function uc( $str, $first = false ) {
if ( function_exists( 'mb_strtoupper' ) ) {
if ( $first ) {
if ( $this->isMultibyte( $str ) ) {
return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
return $this->mbUpperChar( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
} else {
return ucfirst( $str );
}
Expand All @@ -2189,6 +2189,26 @@ function uc( $str, $first = false ) {
}
}

/**
* Convert character to uppercase, allowing overrides of the default mb_upper
* behaviour, which is buggy in many ways. Having a conversion table can be
* useful during transitions between PHP versions where unicode changes happen.
* This can make some resources unreachable on-wiki, see discussion at T219279.
* Providing such a conversion table can allow to manage the transition period.
*
* @param string $char
*
* @return string
*/
protected function mbUpperChar( $char ) {
global $wgOverrideUcfirstCharacters;
if ( array_key_exists( $char, $wgOverrideUcfirstCharacters ) ) {
return $wgOverrideUcfirstCharacters[$char];
} else {
return mb_strtoupper( $char );
}
}

/**
* @param $str string
* @return mixed|string
Expand Down

0 comments on commit c65b1b2

Please sign in to comment.