You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

174 lines
5.0 KiB

<?php
/**
* @copyright Copyright (c) 2008 Yii Software LLC
* @link http://www.yiiframework.com/
* @license http://www.yiiframework.com/license/
*/
namespace yii\helpers;
use Yii;
/**
* BaseTransliteratorHelper provides concrete implementation for [[TransliteratorHelper]].
*
* Do not use BaseTransliteratorHelper. Use [[TransliteratorHelper]] instead.
*
* @author Antonio Ramirez <amigo.cobos@gmail.com>
* @since 2.0
*/
class BaseTransliteratorHelper
{
/**
* Transliterates UTF-8 encoded text to US-ASCII. If 'intl' extension is loaded it will use it to transliterate the
* string, otherwise, it will fallback on Unicode character code replacement.
*
* @param string $string the UTF-8 encoded string.
* @param string $unknown replacement string for characters that do not have a suitable ASCII equivalent
* @param string $language optional ISO 639 language code that denotes the language of the input and
* is used to apply language-specific variations. Otherwise the current display language will be used.
* @return string the transliterated text
*/
public static function process($string, $unknown = '?', $language = null)
{
// If intl extension load
if (extension_loaded('intl') === true) {
$options = 'Any-Latin; Latin-ASCII; NFD; [:Nonspacing Mark:] Remove; NFC;';
return transliterator_transliterate($options, $string);
}
if (!preg_match('/[\x80-\xff]/', $string)) {
return $string;
}
static $tail_bytes;
if (!isset($tail_bytes)) {
$tail_bytes = array();
for ($n = 0; $n < 256; $n++) {
if ($n < 0xc0) {
$remaining = 0;
} elseif ($n < 0xe0) {
$remaining = 1;
} elseif ($n < 0xf0) {
$remaining = 2;
} elseif ($n < 0xf8) {
$remaining = 3;
} elseif ($n < 0xfc) {
$remaining = 4;
} elseif ($n < 0xfe) {
$remaining = 5;
} else {
$remaining = 0;
}
$tail_bytes[chr($n)] = $remaining;
}
}
preg_match_all('/[\x00-\x7f]+|[\x80-\xff][\x00-\x40\x5b-\x5f\x7b-\xff]*/', $string, $matches);
$result = [];
foreach ($matches[0] as $str) {
if ($str[0] < "\x80") {
$result[] = $str;
continue;
}
$head = '';
$chunk = strlen($str);
$len = $chunk + 1;
for ($i = -1; --$len;) {
$c = $str[++$i];
if ($remaining = $tail_bytes[$c]) {
$sequence = $head = $c;
do {
if (--$len && ($c = $str[++$i]) >= "\x80" && $c < "\xc0") {
$sequence .= $c;
} else {
if ($len == 0) {
$result[] = $unknown;
break 2;
} else {
$result[] = $unknown;
--$i;
++$len;
continue 2;
}
}
} while (--$remaining);
$n = ord($head);
if ($n <= 0xdf) {
$ord = ($n - 192) * 64 + (ord($sequence[1]) - 128);
} elseif ($n <= 0xef) {
$ord = ($n - 224) * 4096 + (ord($sequence[1]) - 128) * 64 + (ord($sequence[2]) - 128);
} elseif ($n <= 0xf7) {
$ord = ($n - 240) * 262144 + (ord($sequence[1]) - 128) * 4096 +
(ord($sequence[2]) - 128) * 64 + (ord($sequence[3]) - 128);
} elseif ($n <= 0xfb) {
$ord = ($n - 248) * 16777216 + (ord($sequence[1]) - 128) * 262144 +
(ord($sequence[2]) - 128) * 4096 + (ord($sequence[3]) - 128) * 64 + (ord($sequence[4]) - 128);
} elseif ($n <= 0xfd) {
$ord = ($n - 252) * 1073741824 + (ord($sequence[1]) - 128) * 16777216 +
(ord($sequence[2]) - 128) * 262144 + (ord($sequence[3]) - 128) * 4096 +
(ord($sequence[4]) - 128) * 64 + (ord($sequence[5]) - 128);
}
$result[] = static::replace($ord, $unknown, $language);
$head = '';
} elseif ($c < "\x80") {
$result[] = $c;
$head = '';
} elseif ($c < "\xc0") {
if ($head == '') {
$result[] = $unknown;
}
} else {
$result[] = $unknown;
$head = '';
}
}
}
return implode('', $result);
}
/**
* @param int $ord an ordinal Unicode character code
* @param string $unknown a replacement string for characters that do not have a suitable ASCII equivalent
* @param string $language optional ISO 639 language code that specifies the language of the input and is used
* to apply
* @return string the ASCII replacement character
*/
public static function replace($ord, $unknown = '?', $language = null)
{
static $map = array();
if (!isset($language)) {
$language = Yii::$app->language;
if (strpos($language, '-')) {
$language = substr($language, 0, strpos($language, '-'));
}
}
$key = $ord >> 8;
if (!isset($map[$key][$language])) {
$file = dirname(__FILE__) . DIRECTORY_SEPARATOR .
'transliteration' . DIRECTORY_SEPARATOR . 'data' . DIRECTORY_SEPARATOR .
sprintf('x%02x', $key) . '.php';
if (file_exists($file)) {
include $file;
// $base + $variant are included vars from
if ($language != 'en' && isset($variant[$language])) {
$map[$key][$language] = $variant[$language] + $base;
} else {
$map[$key][$language] = $base;
}
} else {
$map[$key][$language] = array();
}
}
$ord = $ord & 255;
return isset($map[$key][$language][$ord]) ? $map[$key][$language][$ord] : $unknown;
}
}