*/ class Utf8Controller extends Controller { public $defaultAction = 'check-guide'; /** * Check guide for non-printable characters that may break docs generation. * * @param string $directory the directory to check. If not specified, the default * guide directory will be checked. */ public function actionCheckGuide($directory = null) { if ($directory === null) { $directory = dirname(dirname(__DIR__)) . '/docs'; } if (is_file($directory)) { $files = [$directory]; } else { $files = FileHelper::findFiles($directory, [ 'only' => ['*.md'], ]); } foreach ($files as $file) { $content = file_get_contents($file); $chars = preg_split('//u', $content, null, PREG_SPLIT_NO_EMPTY); $line = 1; $pos = 0; foreach ($chars as $c) { $ord = $this->unicodeOrd($c); $pos++; if ($ord == 0x000A) { $line++; $pos = 0; } if ($ord === false) { $this->found('BROKEN UTF8', $c, $line, $pos, $file); continue; } // http://unicode-table.com/en/blocks/general-punctuation/ if (0x2000 <= $ord && $ord <= 0x200F || 0x2028 <= $ord && $ord <= 0x202E || 0x205f <= $ord && $ord <= 0x206F ) { $this->found('UNSUPPORTED SPACE CHARACTER', $c, $line, $pos, $file); continue; } if ($ord < 0x0020 && $ord != 0x000A && $ord != 0x0009 || 0x0080 <= $ord && $ord < 0x009F) { $this->found('CONTROL CHARARCTER', $c, $line, $pos, $file); continue; } // if ($ord > 0x009F) { // $this->found("NON ASCII CHARARCTER", $c, $line, $pos, $file); // continue; // } } } } private $_foundFiles = []; private function found($what, $char, $line, $pos, $file) { if (!isset($this->_foundFiles[$file])) { $this->stdout("$file: \n", Console::BOLD); $this->_foundFiles[$file] = $file; } $hexcode = dechex($this->unicodeOrd($char)); $hexcode = str_repeat('0', max(4 - strlen($hexcode), 0)) . $hexcode; $this->stdout(" at $line:$pos FOUND $what: 0x$hexcode '$char' http://unicode-table.com/en/$hexcode/\n"); } /** * Equivalent for ord() just for unicode. * * http://stackoverflow.com/a/10333324/1106908 * * @param $c * @return bool|int */ private function unicodeOrd($c) { $h = ord($c[0]); if ($h <= 0x7F) { return $h; } elseif ($h < 0xC2) { return false; } elseif ($h <= 0xDF) { return ($h & 0x1F) << 6 | (ord($c[1]) & 0x3F); } elseif ($h <= 0xEF) { return ($h & 0x0F) << 12 | (ord($c[1]) & 0x3F) << 6 | (ord($c[2]) & 0x3F); } elseif ($h <= 0xF4) { return ($h & 0x0F) << 18 | (ord($c[1]) & 0x3F) << 12 | (ord($c[2]) & 0x3F) << 6 | (ord($c[3]) & 0x3F); } return false; } }