|
2010-06-02 20:24:131821
|
converts a UTF8-string into HTML entities |
|
È£¼®
|
|
| ÀÏ¹Ý | |
|
silverbeat -eat- gmx -hot- at
10-Mar-2010 05:42
When using UTF-8 as a charset, htmlentities will only convert 1-byte and 2-byte characters. Use this function if you also want to convert 3-byte and 4-byte characters:
<?php
// converts a UTF8-string into HTML entities
// - $utf8: the UTF8-string to convert
// - $encodeTags: booloean. TRUE will convert "<" to "<"
// - return: returns the converted HTML-string
function utf8tohtml($utf8, $encodeTags) {
$result = '';
for ($i = 0; $i < strlen($utf8); $i++) {
$char = $utf8[$i];
$ascii = ord($char);
if ($ascii < 128) {
// one-byte character
$result .= ($encodeTags) ? htmlentities($char) : $char;
} else if ($ascii < 192) {
// non-utf8 character or not a start byte
} else if ($ascii < 224) {
// two-byte character
$result .= htmlentities(substr($utf8, $i, 2), ENT_QUOTES, 'UTF-8');
$i++;
} else if ($ascii < 240) {
// three-byte character
$ascii1 = ord($utf8[$i+1]);
$ascii2 = ord($utf8[$i+2]);
$unicode = (15 & $ascii) * 4096 +
(63 & $ascii1) * 64 +
(63 & $ascii2);
$result .= "$unicode;";
$i += 2;
} else if ($ascii < 248) {
// four-byte character
$ascii1 = ord($utf8[$i+1]);
$ascii2 = ord($utf8[$i+2]);
$ascii3 = ord($utf8[$i+3]);
$unicode = (15 & $ascii) * 262144 +
(63 & $ascii1) * 4096 +
(63 & $ascii2) * 64 +
(63 & $ascii3);
$result .= "$unicode;";
$i += 3;
}
}
return $result;
}
echo utf8tohtml($anyUTF8string, TRUE);
?>
regards, silverbeat
|
|
|
|
|