You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
			
				
					174 lines
				
				5.0 KiB
			
		
		
			
		
	
	
					174 lines
				
				5.0 KiB
			| 
											12 years ago
										 | <?php
 | ||
|  | /**
 | ||
|  |  * @copyright Copyright (c) 2008 Yii Software LLC
 | ||
|  |  * @link http://www.yiiframework.com/
 | ||
|  |  * @license http://www.yiiframework.com/license/
 | ||
|  |  */
 | ||
|  | 
 | ||
|  | namespace yii\helpers;
 | ||
|  | 
 | ||
|  | use Yii;
 | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * BaseTransliteratorHelper provides concrete implementation for [[TransliteratorHelper]].
 | ||
|  |  *
 | ||
|  |  * Do not use BaseTransliteratorHelper. Use [[TransliteratorHelper]] instead.
 | ||
|  |  *
 | ||
|  |  * @author Antonio Ramirez <amigo.cobos@gmail.com>
 | ||
|  |  * @since 2.0
 | ||
|  |  */
 | ||
|  | class BaseTransliteratorHelper
 | ||
|  | {
 | ||
|  | 	/**
 | ||
|  | 	 * Transliterates UTF-8 encoded text to US-ASCII. If 'intl' extension is loaded it will use it to transliterate the
 | ||
|  | 	 * string, otherwise, it will fallback on Unicode character code replacement.
 | ||
|  | 	 *
 | ||
|  | 	 * @param string $string the UTF-8 encoded string.
 | ||
|  | 	 * @param string $unknown replacement string for characters that do not have a suitable ASCII equivalent
 | ||
|  | 	 * @param string $language optional ISO 639 language code that denotes the language of the input and
 | ||
|  | 	 * is used to apply language-specific variations. Otherwise the current display language will be used.
 | ||
|  | 	 * @return string the transliterated text
 | ||
|  | 	 */
 | ||
|  | 	public static function process($string, $unknown = '?', $language = null)
 | ||
|  | 	{
 | ||
|  | 		// If intl extension load
 | ||
|  | 		if (extension_loaded('intl') === true) {
 | ||
|  | 			$options = 'Any-Latin; Latin-ASCII; NFD; [:Nonspacing Mark:] Remove; NFC;';
 | ||
|  | 			return transliterator_transliterate($options, $string);
 | ||
|  | 		}
 | ||
|  | 		if (!preg_match('/[\x80-\xff]/', $string)) {
 | ||
|  | 			return $string;
 | ||
|  | 		}
 | ||
|  | 		static $tail_bytes;
 | ||
|  | 
 | ||
|  | 		if (!isset($tail_bytes)) {
 | ||
|  | 			$tail_bytes = array();
 | ||
|  | 			for ($n = 0; $n < 256; $n++) {
 | ||
|  | 				if ($n < 0xc0) {
 | ||
|  | 					$remaining = 0;
 | ||
|  | 				} elseif ($n < 0xe0) {
 | ||
|  | 					$remaining = 1;
 | ||
|  | 				} elseif ($n < 0xf0) {
 | ||
|  | 					$remaining = 2;
 | ||
|  | 				} elseif ($n < 0xf8) {
 | ||
|  | 					$remaining = 3;
 | ||
|  | 				} elseif ($n < 0xfc) {
 | ||
|  | 					$remaining = 4;
 | ||
|  | 				} elseif ($n < 0xfe) {
 | ||
|  | 					$remaining = 5;
 | ||
|  | 				} else {
 | ||
|  | 					$remaining = 0;
 | ||
|  | 				}
 | ||
|  | 				$tail_bytes[chr($n)] = $remaining;
 | ||
|  | 			}
 | ||
|  | 		}
 | ||
|  | 
 | ||
|  | 		preg_match_all('/[\x00-\x7f]+|[\x80-\xff][\x00-\x40\x5b-\x5f\x7b-\xff]*/', $string, $matches);
 | ||
|  | 
 | ||
|  | 		$result = [];
 | ||
|  | 		foreach ($matches[0] as $str) {
 | ||
|  | 			if ($str[0] < "\x80") {
 | ||
|  | 				$result[] = $str;
 | ||
|  | 				continue;
 | ||
|  | 			}
 | ||
|  | 
 | ||
|  | 			$head = '';
 | ||
|  | 			$chunk = strlen($str);
 | ||
|  | 			$len = $chunk + 1;
 | ||
|  | 			for ($i = -1; --$len;) {
 | ||
|  | 				$c = $str[++$i];
 | ||
|  | 				if ($remaining = $tail_bytes[$c]) {
 | ||
|  | 					$sequence = $head = $c;
 | ||
|  | 					do {
 | ||
|  | 						if (--$len && ($c = $str[++$i]) >= "\x80" && $c < "\xc0") {
 | ||
|  | 							$sequence .= $c;
 | ||
|  | 						} else {
 | ||
|  | 							if ($len == 0) {
 | ||
|  | 								$result[] = $unknown;
 | ||
|  | 								break 2;
 | ||
|  | 							} else {
 | ||
|  | 								$result[] = $unknown;
 | ||
|  | 								--$i;
 | ||
|  | 								++$len;
 | ||
|  | 								continue 2;
 | ||
|  | 							}
 | ||
|  | 						}
 | ||
|  | 					} while (--$remaining);
 | ||
|  | 
 | ||
|  | 					$n = ord($head);
 | ||
|  | 					if ($n <= 0xdf) {
 | ||
|  | 						$ord = ($n - 192) * 64 + (ord($sequence[1]) - 128);
 | ||
|  | 					} elseif ($n <= 0xef) {
 | ||
|  | 						$ord = ($n - 224) * 4096 + (ord($sequence[1]) - 128) * 64 + (ord($sequence[2]) - 128);
 | ||
|  | 					} elseif ($n <= 0xf7) {
 | ||
|  | 						$ord = ($n - 240) * 262144 + (ord($sequence[1]) - 128) * 4096 +
 | ||
|  | 							(ord($sequence[2]) - 128) * 64 + (ord($sequence[3]) - 128);
 | ||
|  | 					} elseif ($n <= 0xfb) {
 | ||
|  | 						$ord = ($n - 248) * 16777216 + (ord($sequence[1]) - 128) * 262144 +
 | ||
|  | 							(ord($sequence[2]) - 128) * 4096 + (ord($sequence[3]) - 128) * 64 + (ord($sequence[4]) - 128);
 | ||
|  | 					} elseif ($n <= 0xfd) {
 | ||
|  | 						$ord = ($n - 252) * 1073741824 + (ord($sequence[1]) - 128) * 16777216 +
 | ||
|  | 							(ord($sequence[2]) - 128) * 262144 + (ord($sequence[3]) - 128) * 4096 +
 | ||
|  | 							(ord($sequence[4]) - 128) * 64 + (ord($sequence[5]) - 128);
 | ||
|  | 					}
 | ||
|  | 					$result[] = static::replace($ord, $unknown, $language);
 | ||
|  | 					$head = '';
 | ||
|  | 				} elseif ($c < "\x80") {
 | ||
|  | 					$result[] = $c;
 | ||
|  | 					$head = '';
 | ||
|  | 				} elseif ($c < "\xc0") {
 | ||
|  | 					if ($head == '') {
 | ||
|  | 						$result[] = $unknown;
 | ||
|  | 					}
 | ||
|  | 				} else {
 | ||
|  | 					$result[] = $unknown;
 | ||
|  | 					$head = '';
 | ||
|  | 				}
 | ||
|  | 			}
 | ||
|  | 		}
 | ||
|  | 		return implode('', $result);
 | ||
|  | 	}
 | ||
|  | 
 | ||
|  | 	/**
 | ||
|  | 	 * @param int $ord an ordinal Unicode character code
 | ||
|  | 	 * @param string $unknown a replacement string for characters that do not have a suitable ASCII equivalent
 | ||
|  | 	 * @param string $language optional ISO 639 language code that specifies the language of the input and is used
 | ||
|  | 	 * to apply
 | ||
|  | 	 * @return string the ASCII replacement character
 | ||
|  | 	 */
 | ||
|  | 	public static function replace($ord, $unknown = '?', $language = null)
 | ||
|  | 	{
 | ||
|  | 		static $map = array();
 | ||
|  | 
 | ||
|  | 		if (!isset($language)) {
 | ||
|  | 			$language = Yii::$app->language;
 | ||
|  | 			if (strpos($language, '-')) {
 | ||
|  | 				$language = substr($language, 0, strpos($language, '-'));
 | ||
|  | 			}
 | ||
|  | 		}
 | ||
|  | 
 | ||
|  | 		$key = $ord >> 8;
 | ||
|  | 
 | ||
|  | 		if (!isset($map[$key][$language])) {
 | ||
|  | 			$file = dirname(__FILE__) . DIRECTORY_SEPARATOR .
 | ||
|  | 				'transliteration' . DIRECTORY_SEPARATOR . 'data' . DIRECTORY_SEPARATOR .
 | ||
|  | 				sprintf('x%02x', $key) . '.php';
 | ||
|  | 
 | ||
|  | 			if (file_exists($file)) {
 | ||
|  | 				include $file;
 | ||
|  | 				// $base + $variant are included vars from
 | ||
|  | 				if ($language != 'en' && isset($variant[$language])) {
 | ||
|  | 					$map[$key][$language] = $variant[$language] + $base;
 | ||
|  | 				} else {
 | ||
|  | 					$map[$key][$language] = $base;
 | ||
|  | 				}
 | ||
|  | 			} else {
 | ||
|  | 				$map[$key][$language] = array();
 | ||
|  | 			}
 | ||
|  | 		}
 | ||
|  | 
 | ||
|  | 		$ord = $ord & 255;
 | ||
|  | 
 | ||
|  | 		return isset($map[$key][$language][$ord]) ? $map[$key][$language][$ord] : $unknown;
 | ||
|  | 	}
 | ||
|  | }
 |