diff --git a/includes/wikia/VariablesBase.php b/includes/wikia/VariablesBase.php index f2a4a1ae1225..ed09c4c7f1bd 100644 --- a/includes/wikia/VariablesBase.php +++ b/includes/wikia/VariablesBase.php @@ -8999,3 +8999,328 @@ * @var $wgEnableHydralyticsExt */ $wgEnableHydralyticsExt = true; + +/** + * List of Unicode characters for which capitalization is overridden in + * Language::ucfirst. The characters should be + * represented as char_to_convert => conversion_override. See T219279 for details + * on why this is useful during php version transitions. + * + * @warning: EXPERIMENTAL! + * + * @var array + */ +$wgOverrideUcfirstCharacters = [ + 'ß' => 'ß', + 'ʼn' => 'ʼn', + 'Dž' => 'Dž', + 'dž' => 'Dž', + 'Lj' => 'Lj', + 'lj' => 'Lj', + 'Nj' => 'Nj', + 'nj' => 'Nj', + 'ǰ' => 'ǰ', + 'Dz' => 'Dz', + 'dz' => 'Dz', + 'ɪ' => 'ɪ', + 'ͅ' => 'ͅ', + 'ΐ' => 'ΐ', + 'ΰ' => 'ΰ', + 'և' => 'և', + 'ა' => 'ა', + 'ბ' => 'ბ', + 'გ' => 'გ', + 'დ' => 'დ', + 'ე' => 'ე', + 'ვ' => 'ვ', + 'ზ' => 'ზ', + 'თ' => 'თ', + 'ი' => 'ი', + 'კ' => 'კ', + 'ლ' => 'ლ', + 'მ' => 'მ', + 'ნ' => 'ნ', + 'ო' => 'ო', + 'პ' => 'პ', + 'ჟ' => 'ჟ', + 'რ' => 'რ', + 'ს' => 'ს', + 'ტ' => 'ტ', + 'უ' => 'უ', + 'ფ' => 'ფ', + 'ქ' => 'ქ', + 'ღ' => 'ღ', + 'ყ' => 'ყ', + 'შ' => 'შ', + 'ჩ' => 'ჩ', + 'ც' => 'ც', + 'ძ' => 'ძ', + 'წ' => 'წ', + 'ჭ' => 'ჭ', + 'ხ' => 'ხ', + 'ჯ' => 'ჯ', + 'ჰ' => 'ჰ', + 'ჱ' => 'ჱ', + 'ჲ' => 'ჲ', + 'ჳ' => 'ჳ', + 'ჴ' => 'ჴ', + 'ჵ' => 'ჵ', + 'ჶ' => 'ჶ', + 'ჷ' => 'ჷ', + 'ჸ' => 'ჸ', + 'ჹ' => 'ჹ', + 'ჺ' => 'ჺ', + 'ჽ' => 'ჽ', + 'ჾ' => 'ჾ', + 'ჿ' => 'ჿ', + 'ᲀ' => 'ᲀ', + 'ᲁ' => 'ᲁ', + 'ᲂ' => 'ᲂ', + 'ᲃ' => 'ᲃ', + 'ᲄ' => 'ᲄ', + 'ᲅ' => 'ᲅ', + 'ᲆ' => 'ᲆ', + 'ᲇ' => 'ᲇ', + 'ᲈ' => 'ᲈ', + 'ẖ' => 'ẖ', + 'ẗ' => 'ẗ', + 'ẘ' => 'ẘ', + 'ẙ' => 'ẙ', + 'ẚ' => 'ẚ', + 'ὐ' => 'ὐ', + 'ὒ' => 'ὒ', + 'ὔ' => 'ὔ', + 'ὖ' => 'ὖ', + 'ᾀ' => 'ᾈ', + 'ᾁ' => 'ᾉ', + 'ᾂ' => 'ᾊ', + 'ᾃ' => 'ᾋ', + 'ᾄ' => 'ᾌ', + 'ᾅ' => 'ᾍ', + 'ᾆ' => 'ᾎ', + 'ᾇ' => 'ᾏ', + 'ᾈ' => 'ᾈ', + 'ᾉ' => 'ᾉ', + 'ᾊ' => 'ᾊ', + 'ᾋ' => 'ᾋ', + 'ᾌ' => 'ᾌ', + 'ᾍ' => 'ᾍ', + 'ᾎ' => 'ᾎ', + 'ᾏ' => 'ᾏ', + 'ᾐ' => 'ᾘ', + 'ᾑ' => 'ᾙ', + 'ᾒ' => 'ᾚ', + 'ᾓ' => 'ᾛ', + 'ᾔ' => 'ᾜ', + 'ᾕ' => 'ᾝ', + 'ᾖ' => 'ᾞ', + 'ᾗ' => 'ᾟ', + 'ᾘ' => 'ᾘ', + 'ᾙ' => 'ᾙ', + 'ᾚ' => 'ᾚ', + 'ᾛ' => 'ᾛ', + 'ᾜ' => 'ᾜ', + 'ᾝ' => 'ᾝ', + 'ᾞ' => 'ᾞ', + 'ᾟ' => 'ᾟ', + 'ᾠ' => 'ᾨ', + 'ᾡ' => 'ᾩ', + 'ᾢ' => 'ᾪ', + 'ᾣ' => 'ᾫ', + 'ᾤ' => 'ᾬ', + 'ᾥ' => 'ᾭ', + 'ᾦ' => 'ᾮ', + 'ᾧ' => 'ᾯ', + 'ᾨ' => 'ᾨ', + 'ᾩ' => 'ᾩ', + 'ᾪ' => 'ᾪ', + 'ᾫ' => 'ᾫ', + 'ᾬ' => 'ᾬ', + 'ᾭ' => 'ᾭ', + 'ᾮ' => 'ᾮ', + 'ᾯ' => 'ᾯ', + 'ᾲ' => 'ᾲ', + 'ᾳ' => 'ᾼ', + 'ᾴ' => 'ᾴ', + 'ᾶ' => 'ᾶ', + 'ᾷ' => 'ᾷ', + 'ᾼ' => 'ᾼ', + 'ῂ' => 'ῂ', + 'ῃ' => 'ῌ', + 'ῄ' => 'ῄ', + 'ῆ' => 'ῆ', + 'ῇ' => 'ῇ', + 'ῌ' => 'ῌ', + 'ῒ' => 'ῒ', + 'ΐ' => 'ΐ', + 'ῖ' => 'ῖ', + 'ῗ' => 'ῗ', + 'ῢ' => 'ῢ', + 'ΰ' => 'ΰ', + 'ῤ' => 'ῤ', + 'ῦ' => 'ῦ', + 'ῧ' => 'ῧ', + 'ῲ' => 'ῲ', + 'ῳ' => 'ῼ', + 'ῴ' => 'ῴ', + 'ῶ' => 'ῶ', + 'ῷ' => 'ῷ', + 'ῼ' => 'ῼ', + 'ⅰ' => 'ⅰ', + 'ⅱ' => 'ⅱ', + 'ⅲ' => 'ⅲ', + 'ⅳ' => 'ⅳ', + 'ⅴ' => 'ⅴ', + 'ⅵ' => 'ⅵ', + 'ⅶ' => 'ⅶ', + 'ⅷ' => 'ⅷ', + 'ⅸ' => 'ⅸ', + 'ⅹ' => 'ⅹ', + 'ⅺ' => 'ⅺ', + 'ⅻ' => 'ⅻ', + 'ⅼ' => 'ⅼ', + 'ⅽ' => 'ⅽ', + 'ⅾ' => 'ⅾ', + 'ⅿ' => 'ⅿ', + 'ⓐ' => 'ⓐ', + 'ⓑ' => 'ⓑ', + 'ⓒ' => 'ⓒ', + 'ⓓ' => 'ⓓ', + 'ⓔ' => 'ⓔ', + 'ⓕ' => 'ⓕ', + 'ⓖ' => 'ⓖ', + 'ⓗ' => 'ⓗ', + 'ⓘ' => 'ⓘ', + 'ⓙ' => 'ⓙ', + 'ⓚ' => 'ⓚ', + 'ⓛ' => 'ⓛ', + 'ⓜ' => 'ⓜ', + 'ⓝ' => 'ⓝ', + 'ⓞ' => 'ⓞ', + 'ⓟ' => 'ⓟ', + 'ⓠ' => 'ⓠ', + 'ⓡ' => 'ⓡ', + 'ⓢ' => 'ⓢ', + 'ⓣ' => 'ⓣ', + 'ⓤ' => 'ⓤ', + 'ⓥ' => 'ⓥ', + 'ⓦ' => 'ⓦ', + 'ⓧ' => 'ⓧ', + 'ⓨ' => 'ⓨ', + 'ⓩ' => 'ⓩ', + 'ꞹ' => 'ꞹ', + 'ff' => 'ff', + 'fi' => 'fi', + 'fl' => 'fl', + 'ffi' => 'ffi', + 'ffl' => 'ffl', + 'ſt' => 'ſt', + 'st' => 'st', + 'ﬓ' => 'ﬓ', + 'ﬔ' => 'ﬔ', + 'ﬕ' => 'ﬕ', + 'ﬖ' => 'ﬖ', + 'ﬗ' => 'ﬗ', + '𐓘' => '𐓘', + '𐓙' => '𐓙', + '𐓚' => '𐓚', + '𐓛' => '𐓛', + '𐓜' => '𐓜', + '𐓝' => '𐓝', + '𐓞' => '𐓞', + '𐓟' => '𐓟', + '𐓠' => '𐓠', + '𐓡' => '𐓡', + '𐓢' => '𐓢', + '𐓣' => '𐓣', + '𐓤' => '𐓤', + '𐓥' => '𐓥', + '𐓦' => '𐓦', + '𐓧' => '𐓧', + '𐓨' => '𐓨', + '𐓩' => '𐓩', + '𐓪' => '𐓪', + '𐓫' => '𐓫', + '𐓬' => '𐓬', + '𐓭' => '𐓭', + '𐓮' => '𐓮', + '𐓯' => '𐓯', + '𐓰' => '𐓰', + '𐓱' => '𐓱', + '𐓲' => '𐓲', + '𐓳' => '𐓳', + '𐓴' => '𐓴', + '𐓵' => '𐓵', + '𐓶' => '𐓶', + '𐓷' => '𐓷', + '𐓸' => '𐓸', + '𐓹' => '𐓹', + '𐓺' => '𐓺', + '𐓻' => '𐓻', + '𖹠' => '𖹠', + '𖹡' => '𖹡', + '𖹢' => '𖹢', + '𖹣' => '𖹣', + '𖹤' => '𖹤', + '𖹥' => '𖹥', + '𖹦' => '𖹦', + '𖹧' => '𖹧', + '𖹨' => '𖹨', + '𖹩' => '𖹩', + '𖹪' => '𖹪', + '𖹫' => '𖹫', + '𖹬' => '𖹬', + '𖹭' => '𖹭', + '𖹮' => '𖹮', + '𖹯' => '𖹯', + '𖹰' => '𖹰', + '𖹱' => '𖹱', + '𖹲' => '𖹲', + '𖹳' => '𖹳', + '𖹴' => '𖹴', + '𖹵' => '𖹵', + '𖹶' => '𖹶', + '𖹷' => '𖹷', + '𖹸' => '𖹸', + '𖹹' => '𖹹', + '𖹺' => '𖹺', + '𖹻' => '𖹻', + '𖹼' => '𖹼', + '𖹽' => '𖹽', + '𖹾' => '𖹾', + '𖹿' => '𖹿', + '𞤢' => '𞤢', + '𞤣' => '𞤣', + '𞤤' => '𞤤', + '𞤥' => '𞤥', + '𞤦' => '𞤦', + '𞤧' => '𞤧', + '𞤨' => '𞤨', + '𞤩' => '𞤩', + '𞤪' => '𞤪', + '𞤫' => '𞤫', + '𞤬' => '𞤬', + '𞤭' => '𞤭', + '𞤮' => '𞤮', + '𞤯' => '𞤯', + '𞤰' => '𞤰', + '𞤱' => '𞤱', + '𞤲' => '𞤲', + '𞤳' => '𞤳', + '𞤴' => '𞤴', + '𞤵' => '𞤵', + '𞤶' => '𞤶', + '𞤷' => '𞤷', + '𞤸' => '𞤸', + '𞤹' => '𞤹', + '𞤺' => '𞤺', + '𞤻' => '𞤻', + '𞤼' => '𞤼', + '𞤽' => '𞤽', + '𞤾' => '𞤾', + '𞤿' => '𞤿', + '𞥀' => '𞥀', + '𞥁' => '𞥁', + '𞥂' => '𞥂', + '𞥃' => '𞥃', +]; diff --git a/languages/Language.php b/languages/Language.php index 3cc3456f4eca..081788865283 100644 --- a/languages/Language.php +++ b/languages/Language.php @@ -2168,7 +2168,7 @@ function uc( $str, $first = false ) { if ( function_exists( 'mb_strtoupper' ) ) { if ( $first ) { if ( $this->isMultibyte( $str ) ) { - return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 ); + return $this->mbUpperChar( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 ); } else { return ucfirst( $str ); } @@ -2189,6 +2189,26 @@ function uc( $str, $first = false ) { } } + /** + * Convert character to uppercase, allowing overrides of the default mb_upper + * behaviour, which is buggy in many ways. Having a conversion table can be + * useful during transitions between PHP versions where unicode changes happen. + * This can make some resources unreachable on-wiki, see discussion at T219279. + * Providing such a conversion table can allow to manage the transition period. + * + * @param string $char + * + * @return string + */ + protected function mbUpperChar( $char ) { + global $wgOverrideUcfirstCharacters; + if ( array_key_exists( $char, $wgOverrideUcfirstCharacters ) ) { + return $wgOverrideUcfirstCharacters[$char]; + } else { + return mb_strtoupper( $char ); + } + } + /** * @param $str string * @return mixed|string