You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
126 lines
3.8 KiB
126 lines
3.8 KiB
<?php |
|
/** |
|
* @link http://www.yiiframework.com/ |
|
* @copyright Copyright (c) 2008 Yii Software LLC |
|
* @license http://www.yiiframework.com/license/ |
|
*/ |
|
|
|
namespace yii\build\controllers; |
|
|
|
use yii\console\Controller; |
|
use yii\helpers\Console; |
|
use yii\helpers\FileHelper; |
|
|
|
/** |
|
* Check files for broken UTF8 and non-printable characters. |
|
* |
|
* @author Carsten Brandt <mail@cebe.cc> |
|
*/ |
|
class Utf8Controller extends Controller |
|
{ |
|
public $defaultAction = 'check-guide'; |
|
|
|
/** |
|
* Check guide for non-printable characters that may break docs generation. |
|
* |
|
* @param string $directory the directory to check. If not specified, the default |
|
* guide directory will be checked. |
|
*/ |
|
public function actionCheckGuide($directory = null) |
|
{ |
|
if ($directory === null) { |
|
$directory = \dirname(\dirname(__DIR__)) . '/docs'; |
|
} |
|
if (is_file($directory)) { |
|
$files = [$directory]; |
|
} else { |
|
$files = FileHelper::findFiles($directory, [ |
|
'only' => ['*.md'], |
|
]); |
|
} |
|
|
|
foreach ($files as $file) { |
|
$content = file_get_contents($file); |
|
$chars = preg_split('//u', $content, null, PREG_SPLIT_NO_EMPTY); |
|
|
|
$line = 1; |
|
$pos = 0; |
|
foreach ($chars as $c) { |
|
$ord = $this->unicodeOrd($c); |
|
|
|
$pos++; |
|
if ($ord == 0x000A) { |
|
$line++; |
|
$pos = 0; |
|
} |
|
|
|
if ($ord === false) { |
|
$this->found('BROKEN UTF8', $c, $line, $pos, $file); |
|
continue; |
|
} |
|
|
|
// http://unicode-table.com/en/blocks/general-punctuation/ |
|
if (0x2000 <= $ord && $ord <= 0x200F |
|
|| 0x2028 <= $ord && $ord <= 0x202E |
|
|| 0x205f <= $ord && $ord <= 0x206F |
|
) { |
|
$this->found('UNSUPPORTED SPACE CHARACTER', $c, $line, $pos, $file); |
|
continue; |
|
} |
|
if ($ord < 0x0020 && $ord != 0x000A && $ord != 0x0009 || |
|
0x0080 <= $ord && $ord < 0x009F) { |
|
$this->found('CONTROL CHARARCTER', $c, $line, $pos, $file); |
|
continue; |
|
} |
|
// if ($ord > 0x009F) { |
|
// $this->found("NON ASCII CHARARCTER", $c, $line, $pos, $file); |
|
// continue; |
|
// } |
|
} |
|
} |
|
} |
|
|
|
private $_foundFiles = []; |
|
|
|
private function found($what, $char, $line, $pos, $file) |
|
{ |
|
if (!isset($this->_foundFiles[$file])) { |
|
$this->stdout("$file: \n", Console::BOLD); |
|
$this->_foundFiles[$file] = $file; |
|
} |
|
|
|
$hexcode = dechex($this->unicodeOrd($char)); |
|
$hexcode = str_repeat('0', max(4 - \strlen($hexcode), 0)) . $hexcode; |
|
|
|
$this->stdout(" at $line:$pos FOUND $what: 0x$hexcode '$char' http://unicode-table.com/en/$hexcode/\n"); |
|
} |
|
|
|
/** |
|
* Equivalent for ord() just for unicode. |
|
* |
|
* http://stackoverflow.com/a/10333324/1106908 |
|
* |
|
* @param $c |
|
* @return bool|int |
|
*/ |
|
private function unicodeOrd($c) |
|
{ |
|
$h = \ord($c[0]); |
|
if ($h <= 0x7F) { |
|
return $h; |
|
} elseif ($h < 0xC2) { |
|
return false; |
|
} elseif ($h <= 0xDF) { |
|
return ($h & 0x1F) << 6 | (\ord($c[1]) & 0x3F); |
|
} elseif ($h <= 0xEF) { |
|
return ($h & 0x0F) << 12 | (\ord($c[1]) & 0x3F) << 6 |
|
| (\ord($c[2]) & 0x3F); |
|
} elseif ($h <= 0xF4) { |
|
return ($h & 0x0F) << 18 | (\ord($c[1]) & 0x3F) << 12 |
|
| (\ord($c[2]) & 0x3F) << 6 |
|
| (\ord($c[3]) & 0x3F); |
|
} |
|
|
|
return false; |
|
} |
|
}
|
|
|