You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							174 lines
						
					
					
						
							5.0 KiB
						
					
					
				
			
		
		
	
	
							174 lines
						
					
					
						
							5.0 KiB
						
					
					
				| <?php | |
| /** | |
|  * @copyright Copyright (c) 2008 Yii Software LLC | |
|  * @link http://www.yiiframework.com/ | |
|  * @license http://www.yiiframework.com/license/ | |
|  */ | |
|  | |
| namespace yii\helpers; | |
|  | |
| use Yii; | |
|  | |
| /** | |
|  * BaseTransliteratorHelper provides concrete implementation for [[TransliteratorHelper]]. | |
|  * | |
|  * Do not use BaseTransliteratorHelper. Use [[TransliteratorHelper]] instead. | |
|  * | |
|  * @author Antonio Ramirez <amigo.cobos@gmail.com> | |
|  * @since 2.0 | |
|  */ | |
| class BaseTransliteratorHelper | |
| { | |
| 	/** | |
| 	 * Transliterates UTF-8 encoded text to US-ASCII. If 'intl' extension is loaded it will use it to transliterate the | |
| 	 * string, otherwise, it will fallback on Unicode character code replacement. | |
| 	 * | |
| 	 * @param string $string the UTF-8 encoded string. | |
| 	 * @param string $unknown replacement string for characters that do not have a suitable ASCII equivalent | |
| 	 * @param string $language optional ISO 639 language code that denotes the language of the input and | |
| 	 * is used to apply language-specific variations. Otherwise the current display language will be used. | |
| 	 * @return string the transliterated text | |
| 	 */ | |
| 	public static function process($string, $unknown = '?', $language = null) | |
| 	{ | |
| 		// If intl extension load | |
| 		if (extension_loaded('intl') === true) { | |
| 			$options = 'Any-Latin; Latin-ASCII; NFD; [:Nonspacing Mark:] Remove; NFC;'; | |
| 			return transliterator_transliterate($options, $string); | |
| 		} | |
| 		if (!preg_match('/[\x80-\xff]/', $string)) { | |
| 			return $string; | |
| 		} | |
| 		static $tail_bytes; | |
|  | |
| 		if (!isset($tail_bytes)) { | |
| 			$tail_bytes = array(); | |
| 			for ($n = 0; $n < 256; $n++) { | |
| 				if ($n < 0xc0) { | |
| 					$remaining = 0; | |
| 				} elseif ($n < 0xe0) { | |
| 					$remaining = 1; | |
| 				} elseif ($n < 0xf0) { | |
| 					$remaining = 2; | |
| 				} elseif ($n < 0xf8) { | |
| 					$remaining = 3; | |
| 				} elseif ($n < 0xfc) { | |
| 					$remaining = 4; | |
| 				} elseif ($n < 0xfe) { | |
| 					$remaining = 5; | |
| 				} else { | |
| 					$remaining = 0; | |
| 				} | |
| 				$tail_bytes[chr($n)] = $remaining; | |
| 			} | |
| 		} | |
|  | |
| 		preg_match_all('/[\x00-\x7f]+|[\x80-\xff][\x00-\x40\x5b-\x5f\x7b-\xff]*/', $string, $matches); | |
|  | |
| 		$result = []; | |
| 		foreach ($matches[0] as $str) { | |
| 			if ($str[0] < "\x80") { | |
| 				$result[] = $str; | |
| 				continue; | |
| 			} | |
|  | |
| 			$head = ''; | |
| 			$chunk = strlen($str); | |
| 			$len = $chunk + 1; | |
| 			for ($i = -1; --$len;) { | |
| 				$c = $str[++$i]; | |
| 				if ($remaining = $tail_bytes[$c]) { | |
| 					$sequence = $head = $c; | |
| 					do { | |
| 						if (--$len && ($c = $str[++$i]) >= "\x80" && $c < "\xc0") { | |
| 							$sequence .= $c; | |
| 						} else { | |
| 							if ($len == 0) { | |
| 								$result[] = $unknown; | |
| 								break 2; | |
| 							} else { | |
| 								$result[] = $unknown; | |
| 								--$i; | |
| 								++$len; | |
| 								continue 2; | |
| 							} | |
| 						} | |
| 					} while (--$remaining); | |
|  | |
| 					$n = ord($head); | |
| 					if ($n <= 0xdf) { | |
| 						$ord = ($n - 192) * 64 + (ord($sequence[1]) - 128); | |
| 					} elseif ($n <= 0xef) { | |
| 						$ord = ($n - 224) * 4096 + (ord($sequence[1]) - 128) * 64 + (ord($sequence[2]) - 128); | |
| 					} elseif ($n <= 0xf7) { | |
| 						$ord = ($n - 240) * 262144 + (ord($sequence[1]) - 128) * 4096 + | |
| 							(ord($sequence[2]) - 128) * 64 + (ord($sequence[3]) - 128); | |
| 					} elseif ($n <= 0xfb) { | |
| 						$ord = ($n - 248) * 16777216 + (ord($sequence[1]) - 128) * 262144 + | |
| 							(ord($sequence[2]) - 128) * 4096 + (ord($sequence[3]) - 128) * 64 + (ord($sequence[4]) - 128); | |
| 					} elseif ($n <= 0xfd) { | |
| 						$ord = ($n - 252) * 1073741824 + (ord($sequence[1]) - 128) * 16777216 + | |
| 							(ord($sequence[2]) - 128) * 262144 + (ord($sequence[3]) - 128) * 4096 + | |
| 							(ord($sequence[4]) - 128) * 64 + (ord($sequence[5]) - 128); | |
| 					} | |
| 					$result[] = static::replace($ord, $unknown, $language); | |
| 					$head = ''; | |
| 				} elseif ($c < "\x80") { | |
| 					$result[] = $c; | |
| 					$head = ''; | |
| 				} elseif ($c < "\xc0") { | |
| 					if ($head == '') { | |
| 						$result[] = $unknown; | |
| 					} | |
| 				} else { | |
| 					$result[] = $unknown; | |
| 					$head = ''; | |
| 				} | |
| 			} | |
| 		} | |
| 		return implode('', $result); | |
| 	} | |
|  | |
| 	/** | |
| 	 * @param int $ord an ordinal Unicode character code | |
| 	 * @param string $unknown a replacement string for characters that do not have a suitable ASCII equivalent | |
| 	 * @param string $language optional ISO 639 language code that specifies the language of the input and is used | |
| 	 * to apply | |
| 	 * @return string the ASCII replacement character | |
| 	 */ | |
| 	public static function replace($ord, $unknown = '?', $language = null) | |
| 	{ | |
| 		static $map = array(); | |
|  | |
| 		if (!isset($language)) { | |
| 			$language = Yii::$app->language; | |
| 			if (strpos($language, '-')) { | |
| 				$language = substr($language, 0, strpos($language, '-')); | |
| 			} | |
| 		} | |
|  | |
| 		$key = $ord >> 8; | |
|  | |
| 		if (!isset($map[$key][$language])) { | |
| 			$file = dirname(__FILE__) . DIRECTORY_SEPARATOR . | |
| 				'transliteration' . DIRECTORY_SEPARATOR . 'data' . DIRECTORY_SEPARATOR . | |
| 				sprintf('x%02x', $key) . '.php'; | |
|  | |
| 			if (file_exists($file)) { | |
| 				include $file; | |
| 				// $base + $variant are included vars from | |
| 				if ($language != 'en' && isset($variant[$language])) { | |
| 					$map[$key][$language] = $variant[$language] + $base; | |
| 				} else { | |
| 					$map[$key][$language] = $base; | |
| 				} | |
| 			} else { | |
| 				$map[$key][$language] = array(); | |
| 			} | |
| 		} | |
|  | |
| 		$ord = $ord & 255; | |
|  | |
| 		return isset($map[$key][$language][$ord]) ? $map[$key][$language][$ord] : $unknown; | |
| 	} | |
| } |