You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
174 lines
5.0 KiB
174 lines
5.0 KiB
11 years ago
|
<?php
|
||
|
/**
|
||
|
* @copyright Copyright (c) 2008 Yii Software LLC
|
||
|
* @link http://www.yiiframework.com/
|
||
|
* @license http://www.yiiframework.com/license/
|
||
|
*/
|
||
|
|
||
|
namespace yii\helpers;
|
||
|
|
||
|
use Yii;
|
||
|
|
||
|
/**
|
||
|
* BaseTransliteratorHelper provides concrete implementation for [[TransliteratorHelper]].
|
||
|
*
|
||
|
* Do not use BaseTransliteratorHelper. Use [[TransliteratorHelper]] instead.
|
||
|
*
|
||
|
* @author Antonio Ramirez <amigo.cobos@gmail.com>
|
||
|
* @since 2.0
|
||
|
*/
|
||
|
class BaseTransliteratorHelper
|
||
|
{
|
||
|
/**
|
||
|
* Transliterates UTF-8 encoded text to US-ASCII. If 'intl' extension is loaded it will use it to transliterate the
|
||
|
* string, otherwise, it will fallback on Unicode character code replacement.
|
||
|
*
|
||
|
* @param string $string the UTF-8 encoded string.
|
||
|
* @param string $unknown replacement string for characters that do not have a suitable ASCII equivalent
|
||
|
* @param string $language optional ISO 639 language code that denotes the language of the input and
|
||
|
* is used to apply language-specific variations. Otherwise the current display language will be used.
|
||
|
* @return string the transliterated text
|
||
|
*/
|
||
|
public static function process($string, $unknown = '?', $language = null)
|
||
|
{
|
||
|
// If intl extension load
|
||
|
if (extension_loaded('intl') === true) {
|
||
|
$options = 'Any-Latin; Latin-ASCII; NFD; [:Nonspacing Mark:] Remove; NFC;';
|
||
|
return transliterator_transliterate($options, $string);
|
||
|
}
|
||
|
if (!preg_match('/[\x80-\xff]/', $string)) {
|
||
|
return $string;
|
||
|
}
|
||
|
static $tail_bytes;
|
||
|
|
||
|
if (!isset($tail_bytes)) {
|
||
|
$tail_bytes = array();
|
||
|
for ($n = 0; $n < 256; $n++) {
|
||
|
if ($n < 0xc0) {
|
||
|
$remaining = 0;
|
||
|
} elseif ($n < 0xe0) {
|
||
|
$remaining = 1;
|
||
|
} elseif ($n < 0xf0) {
|
||
|
$remaining = 2;
|
||
|
} elseif ($n < 0xf8) {
|
||
|
$remaining = 3;
|
||
|
} elseif ($n < 0xfc) {
|
||
|
$remaining = 4;
|
||
|
} elseif ($n < 0xfe) {
|
||
|
$remaining = 5;
|
||
|
} else {
|
||
|
$remaining = 0;
|
||
|
}
|
||
|
$tail_bytes[chr($n)] = $remaining;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
preg_match_all('/[\x00-\x7f]+|[\x80-\xff][\x00-\x40\x5b-\x5f\x7b-\xff]*/', $string, $matches);
|
||
|
|
||
|
$result = [];
|
||
|
foreach ($matches[0] as $str) {
|
||
|
if ($str[0] < "\x80") {
|
||
|
$result[] = $str;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
$head = '';
|
||
|
$chunk = strlen($str);
|
||
|
$len = $chunk + 1;
|
||
|
for ($i = -1; --$len;) {
|
||
|
$c = $str[++$i];
|
||
|
if ($remaining = $tail_bytes[$c]) {
|
||
|
$sequence = $head = $c;
|
||
|
do {
|
||
|
if (--$len && ($c = $str[++$i]) >= "\x80" && $c < "\xc0") {
|
||
|
$sequence .= $c;
|
||
|
} else {
|
||
|
if ($len == 0) {
|
||
|
$result[] = $unknown;
|
||
|
break 2;
|
||
|
} else {
|
||
|
$result[] = $unknown;
|
||
|
--$i;
|
||
|
++$len;
|
||
|
continue 2;
|
||
|
}
|
||
|
}
|
||
|
} while (--$remaining);
|
||
|
|
||
|
$n = ord($head);
|
||
|
if ($n <= 0xdf) {
|
||
|
$ord = ($n - 192) * 64 + (ord($sequence[1]) - 128);
|
||
|
} elseif ($n <= 0xef) {
|
||
|
$ord = ($n - 224) * 4096 + (ord($sequence[1]) - 128) * 64 + (ord($sequence[2]) - 128);
|
||
|
} elseif ($n <= 0xf7) {
|
||
|
$ord = ($n - 240) * 262144 + (ord($sequence[1]) - 128) * 4096 +
|
||
|
(ord($sequence[2]) - 128) * 64 + (ord($sequence[3]) - 128);
|
||
|
} elseif ($n <= 0xfb) {
|
||
|
$ord = ($n - 248) * 16777216 + (ord($sequence[1]) - 128) * 262144 +
|
||
|
(ord($sequence[2]) - 128) * 4096 + (ord($sequence[3]) - 128) * 64 + (ord($sequence[4]) - 128);
|
||
|
} elseif ($n <= 0xfd) {
|
||
|
$ord = ($n - 252) * 1073741824 + (ord($sequence[1]) - 128) * 16777216 +
|
||
|
(ord($sequence[2]) - 128) * 262144 + (ord($sequence[3]) - 128) * 4096 +
|
||
|
(ord($sequence[4]) - 128) * 64 + (ord($sequence[5]) - 128);
|
||
|
}
|
||
|
$result[] = static::replace($ord, $unknown, $language);
|
||
|
$head = '';
|
||
|
} elseif ($c < "\x80") {
|
||
|
$result[] = $c;
|
||
|
$head = '';
|
||
|
} elseif ($c < "\xc0") {
|
||
|
if ($head == '') {
|
||
|
$result[] = $unknown;
|
||
|
}
|
||
|
} else {
|
||
|
$result[] = $unknown;
|
||
|
$head = '';
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return implode('', $result);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* @param int $ord an ordinal Unicode character code
|
||
|
* @param string $unknown a replacement string for characters that do not have a suitable ASCII equivalent
|
||
|
* @param string $language optional ISO 639 language code that specifies the language of the input and is used
|
||
|
* to apply
|
||
|
* @return string the ASCII replacement character
|
||
|
*/
|
||
|
public static function replace($ord, $unknown = '?', $language = null)
|
||
|
{
|
||
|
static $map = array();
|
||
|
|
||
|
if (!isset($language)) {
|
||
|
$language = Yii::$app->language;
|
||
|
if (strpos($language, '-')) {
|
||
|
$language = substr($language, 0, strpos($language, '-'));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
$key = $ord >> 8;
|
||
|
|
||
|
if (!isset($map[$key][$language])) {
|
||
|
$file = dirname(__FILE__) . DIRECTORY_SEPARATOR .
|
||
|
'transliteration' . DIRECTORY_SEPARATOR . 'data' . DIRECTORY_SEPARATOR .
|
||
|
sprintf('x%02x', $key) . '.php';
|
||
|
|
||
|
if (file_exists($file)) {
|
||
|
include $file;
|
||
|
// $base + $variant are included vars from
|
||
|
if ($language != 'en' && isset($variant[$language])) {
|
||
|
$map[$key][$language] = $variant[$language] + $base;
|
||
|
} else {
|
||
|
$map[$key][$language] = $base;
|
||
|
}
|
||
|
} else {
|
||
|
$map[$key][$language] = array();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
$ord = $ord & 255;
|
||
|
|
||
|
return isset($map[$key][$language][$ord]) ? $map[$key][$language][$ord] : $unknown;
|
||
|
}
|
||
|
}
|