/ Published in: PHP
Because htmlentities just doesn't cut it.
Recently needed this to work with 3rd part software, ended up having to make it pretty robust so thought I'd share!
Recently needed this to work with 3rd part software, ended up having to make it pretty robust so thought I'd share!
Expand |
Embed | Plain Text
Copy this code and paste it in your HTML
<?php /** * unicode_ord * * Returns the unicode value of the string * * @param string $c The source string * @param integer $i The index to get the char from (passed by reference for use in a loop) * @return integer The value of the char at $c[$i] * @author kerry at shetline dot com * @author Dom Hastings - modified to suit my needs * @see http://www.php.net/manual/en/function.ord.php#78032 */ function unicode_ord(&$c, &$i = 0) { // get the character length // copy the offset $index = $i; // check it's a valid offset if ($index >= $l) { return false; } // check the value // if it's ascii if ($o <= 0x7F) { return $o; // not sure what it is... } elseif ($o < 0xC2) { return false; // if it's a two-byte character } elseif ($o <= 0xDF && $index < $l - 1) { $i += 1; // three-byte } elseif ($o <= 0xEF && $index < $l - 2) { $i += 2; // four-byte } elseif ($o <= 0xF4 && $index < $l - 3) { $i += 3; // not sure what it is... } else { return false; } } /** * unicode_chr * * @param string $c * @return string * @author Miguel Perez * @see http://www.php.net/manual/en/function.chr.php#77911 */ function unicode_chr(&$c) { if ($c <= 0x7F) { } else if ($c <= 0x7FF) { } else if ($c <= 0xFFFF) { } else if ($c <= 0x10FFFF) { } else { return false; } } /** * xmlentities * * Makes the specified string XML-safe * * @param string $s * @param boolean $hex Whether or not to make hexadecimal entities (as opposed to decimal) * @return string The XML-safe result * @author Dom Hastings * @dependencies unicode_ord() * @see http://www.w3.org/TR/REC-xml/#sec-predefined-ent */ function xmlentities($s, $hex = true) { // if the string is empty // just return it return $s; } // create the return string $r = ''; // get the length // iterate the string for ($i = 0; $i < $l; $i++) { // get the value of the character $o = unicode_ord($s, $i); // valid cahracters $v = ( // \t \n <vertical tab> <form feed> \r ($o >= 9 && $o <= 13) || // <space> ! ($o == 32) || ($o == 33) || // # $ % ($o >= 35 && $o <= 37) || // ( ) * + , - . / ($o >= 40 && $o <= 47) || // numbers ($o >= 48 && $o <= 57) || // : ; ($o == 58) || ($o == 59) || // = ? ($o == 61) || ($o == 63) || // @ ($o == 64) || // uppercase ($o >= 65 && $o <= 90) || // [ \ ] ^ _ ` ($o >= 91 && $o <= 96) || // lowercase ($o >= 97 && $o <= 122) || // { | } ~ ($o >= 123 && $o <= 126) ); // if it's valid, just keep it if ($v) { $r .= $s[$i]; // & } elseif ($o == 38) { $r .= '&'; // < } elseif ($o == 60) { $r .= '<'; // > } elseif ($o == 62) { $r .= '>'; // ' } elseif ($o == 39) { $r .= '''; // " } elseif ($o == 34) { $r .= '"'; // unknown, add it as a reference } elseif ($o > 0) { if ($hex) { } else { $r .= '&#'.$o.';'; } } } return $r; } /** * xmlentity_decode * * Converts XML entity encoded data back to a unicode string * * @param string $s The XML encoded string * @param array $entities Additional entities to decode (optional) * @return string * @dependencies unicode_chr() * @author Dom Hastings */ // if the string is empty, just return it return $s; } // check that entities is an array throw new Exception('xmlentity_decode expects argument 2 to be array.'); } // initialise vars $r = ''; // merge the entities with the defaults (amp, lt, gt, apos and quot MUST take precedence) 'amp' => '&', 'lt' => '<', 'gt' => '>', 'apos' => '\'', 'quot' => '"' )); // loop through the string for ($i = 0; $i < $l; $i++) { // if it looks like an entity if ($s[$i] == '&') { // initialise some vars $e = ''; $c = ''; // loop until we find a semi-colon for ($j = ++$i; ($c != ';' && $j < $l); $j++) { // get the char $c = $s[$j]; // if it's not a semi-colon if ($c != ';') { // add it to the temporary entity string $e .= $c; } } // update the index $i = ($j - 1); // if the first char is a #, it's a numeric entity if ($e[0] == '#') { // if the second char is x it's a hexadecimal entity if ($e[1] == 'x') { // store the number } else { // store the number } } // if we got a number // get the unicode char from it $r .= unicode_chr($e); // otherwise } else { // if it's in our array (which it should be) // append the character $r .= $entities[$e]; // otherwise } else { // throw an exception, we don't know what to do with this throw new Exception('Unknown entity "'.$e.'"'); } } // if it's just a regular char } else { // append it $r .= $s[$i]; } } return $r; }
URL: http://www.dom111.co.uk/blog/coding/xml-entities-in-php/224