= 4.3.0 if (String::hasMBString()) { // mbstring routines are available define('ENABLE_MBSTRING', 1); // Set up required ini settings for mbstring ini_set('mbstring.internal_encoding', $clientCharset); if ($clientCharset == 'utf-8') { ini_set('mbstring.substitute_character', '12307'); } // FIXME Do any other mbstring settings need to be set? } // Define modifier to be used in regexp_* routines define('PCRE_UTF8', $PCRE_UTF8); } /** * Check if server has the mbstring library. * Currently requires PHP >= 4.3.0 (for mb_strtolower, mb_strtoupper, and mb_substr_count) */ function hasMBString() { return (function_exists('mb_strlen') && function_exists('mb_strpos') && function_exists('mb_strrpos') && function_exists('mb_substr') && function_exists('mb_strtolower') && function_exists('mb_strtoupper') && function_exists('mb_substr_count') && function_exists('mb_send_mail')); } /** * Check if server supports the PCRE_UTF8 modifier. */ function hasPCREUTF8() { // The PCRE_UTF8 modifier is only supported on PHP >= 4.1.0 (*nix) or PHP >= 4.2.3 (win32) // Evil check to see if PCRE_UTF8 is supported if (@preg_match('//u', '')) { return true; } else { return false; } } // // Wrappers for basic string manipulation routines. // See the php.net documentation for usage. // function strlen($string) { if (defined('ENABLE_MBSTRING')) { return mb_strlen($string); } else { return strlen($string); } } function strpos($haystack, $needle, $offset = 0) { if (defined('ENABLE_MBSTRING')) { return mb_strpos($haystack, $needle, $offset); } else { return strpos($haystack, $needle, $offset); } } function strrpos($haystack, $needle) { if (defined('ENABLE_MBSTRING')) { return mb_strrpos($haystack, $needle); } else { return strrpos($haystack, $needle); } } function substr($string, $start, $length = null) { if (defined('ENABLE_MBSTRING')) { $substr = 'mb_substr'; } else { $substr = 'substr'; } if (isset($length)) { return $substr($string, $start, $length); } else { return $substr($string, $start); } } function ucfirst($string) { $fc = String::strtoupper(String::substr($string, 0, 1)); return $fc . String::substr($string, 1); } function strtolower($string) { if (defined('ENABLE_MBSTRING')) { return mb_strtolower($string); // Requires PHP >= 4.3.0 } else { return strtolower($string); } } function strtoupper($string) { if (defined('ENABLE_MBSTRING')) { return mb_strtoupper($string); // Requires PHP >= 4.3.0 } else { return strtolower($string); } } function substr_count($haystack, $needle) { if (defined('ENABLE_MBSTRING')) { return mb_substr_count($haystack, $needle); // Requires PHP >= 4.3.0 } else { return substr_count($haystack, $needle); } } function encode_mime_header($string) { if (defined('ENABLE_MBSTRING')) { return mb_encode_mimeheader($string, ini_get('mbstring.internal_encoding'), 'B', MAIL_EOL); } else { return $string; } } function mail($to, $subject, $message, $additional_headers = '', $additional_parameters = '') { // Cannot use mb_send_mail as it base64 encodes the whole body of the email, // making it useless for multipart emails if (empty($additional_parameters)) { return mail($to, $subject, $message, $additional_headers); } else { return mail($to, $subject, $message, $additional_headers, $additional_parameters); } } // // Wrappers for PCRE-compatible regular expression routines. // See the php.net documentation for usage. // function regexp_quote($string, $delimiter = '/') { return preg_quote($string, $delimiter); } function regexp_grep($pattern, $input) { $pattern .= PCRE_UTF8; return preg_grep($pattern, $input); } function regexp_match($pattern, $subject) { $pattern .= PCRE_UTF8; return preg_match($pattern, $subject); } function regexp_match_get($pattern, $subject, &$matches) { // NOTE: This function was created since PHP < 5.x does not support optional reference parameters $pattern .= PCRE_UTF8; return preg_match($pattern, $subject, $matches); } function regexp_match_all($pattern, $subject, &$matches) { $pattern .= PCRE_UTF8; return preg_match_all($pattern, $subject, $matches); } function regexp_replace($pattern, $replacement, $subject, $limit = -1) { $pattern .= PCRE_UTF8; return preg_replace($pattern, $replacement, $subject, $limit); } function regexp_replace_callback($pattern, $callback, $subject, $limit = -1) { $pattern .= PCRE_UTF8; return preg_replace_callback($pattern, $callback, $subject, $limit); } function regexp_split($pattern, $subject, $limit = -1) { $pattern .= PCRE_UTF8; return preg_split($pattern, $subject, $limit); } function mime_content_type($filename) { if (function_exists('mime_content_type')) { return mime_content_type($filename); } elseif (function_exists('finfo_open')) { static $fi; if (!isset($fi)) { $fi = finfo_open(FILEINFO_MIME, Config::getVar('finfo', 'mime_database_path')); } if ($fi !== false) { return finfo_file($fi, $filename); } } $f = escapeshellarg($filename); $result = trim(`file -bi $f`); // Make sure we just return the mime type. if (($i = strpos($result, ';')) !== false) { $result = trim(substr($result, 0, $i)); } return $result; } /** * Strip unsafe HTML from the input text. Covers XSS attacks like scripts, * onclick(...) attributes, javascript: urls, and special characters. * @param $input string input string * @return string */ function stripUnsafeHtml($input) { // Parts of this implementation were taken from Horde: // see http://cvs.horde.org/co.php/framework/MIME/MIME/Viewer/html.php. $allowedHtml = Config::getVar('security', 'allowed_html'); if ($allowedHtml == '') $allowedHtml = '

    1. '; $html = strip_tags($input, $allowedHtml); // Change space entities to space characters $html = preg_replace('/&#(x0*20|0*32);?/i', ' ', $html); // Remove non-printable characters $html = preg_replace('/&#x?0*([9A-D]|1[0-3]);/i', ' ', $html); $html = preg_replace('/&#x?0*[9A-D]([^0-9A-F]|$)/i', ' \\1', $html); $html = preg_replace('/�*(9|1[0-3])([^0-9]|$)/i', ' \\2', $html); // Remove overly long numeric entities $html = preg_replace('/&#x?0*[0-9A-F]{6,};?/i', ' ', $html); /* Get all attribute="javascript:foo()" tags. This is * essentially the regex /(=|url\()("?)[^>]* script:/ but * expanded to catch camouflage with spaces and entities. */ $preg = '/((�*61;?|�*3D;?|=)|' . '((u|�*85;?|�*55;?|�*117;?|�*75;?)\s*' . '(r|�*82;?|�*52;?|�*114;?|�*72;?)\s*' . '(l|�*76;?|�*4c;?|�*108;?|�*6c;?)\s*' . '(\()))\s*' . '(�*34;?|�*22;?|"|�*39;?|�*27;?|\')?' . '[^>]*\s*' . '(s|�*83;?|�*53;?|�*115;?|�*73;?)\s*' . '(c|�*67;?|�*43;?|�*99;?|�*63;?)\s*' . '(r|�*82;?|�*52;?|�*114;?|�*72;?)\s*' . '(i|�*73;?|�*49;?|�*105;?|�*69;?)\s*' . '(p|�*80;?|�*50;?|�*112;?|�*70;?)\s*' . '(t|�*84;?|�*54;?|�*116;?|�*74;?)\s*' . '(:|�*58;?|�*3a;?)/i'; $html = preg_replace($preg, '\1\8OCSCleaned', $html); /* Get all on="bar()". NEVER allow these. */ $html = preg_replace('/([\s"\']+' . '(o|�*79;?|�*4f;?|�*111;?|�*6f;?)' . '(n|�*78;?|�*4e;?|�*110;?|�*6e;?)' . '\w+)\s*=/i', '\1OCSCleaned=', $html); $pattern = array( '|<([^>]*)&{.*}([^>]*)>|', '|<([^>]*)mocha:([^>]*)>|i', '|<([^>]*)binding:([^>]*)>|i' ); $replace = array('<&{;}\3>', '<\1OCSCleaned:\2>', '<\1OCSCleaned:\2>'); $html = preg_replace($pattern, $replace, $html); return $html; } /** * Detect whether a string contains non-ascii multibyte sequences in the UTF-8 range * Does not require any multibyte PHP libraries * @param $input string input string * @return boolean */ function isUTF8 ($str) { // From http://w3.org/International/questions/qa-forms-utf-8.html return preg_match('%(?: [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte |\xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs |[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte |\xED[\x80-\x9F][\x80-\xBF] # excluding surrogates |\xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 |[\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 |\xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 )+%xs', $str); } /** * Returns the UTF-8 string corresponding to the unicode value * Does not require any multibyte PHP libraries * (from php.net, courtesy - romans@void.lv) * @param $input string input string * @return boolean */ function code2utf ($num) { if ($num < 128) return chr($num); if ($num < 2048) return chr(($num >> 6) + 192) . chr(($num & 63) + 128); if ($num < 65536) return chr(($num >> 12) + 224) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128); if ($num < 2097152) return chr(($num >> 18) + 240) . chr((($num >> 12) & 63) + 128) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128); return ''; } /** * Convert UTF-8 encoded characters in a string to escaped HTML entities * This is a helper function for transcoding into HTML * @param $input string input string * @return string */ function utf2html ($str) { $ret = ""; $max = strlen($str); $last = 0; // keeps the index of the last regular character for ($i=0; $i<$max; $i++) { $c = $str{$i}; $c1 = ord($c); if ($c1>>5 == 6) { // 110x xxxx, 110 prefix for 2 bytes unicode $ret .= substr($str, $last, $i-$last); // append all the regular characters we've passed $c1 &= 31; // remove the 3 bit two bytes prefix $c2 = ord($str{++$i}); // the next byte $c2 &= 63; // remove the 2 bit trailing byte prefix $c2 |= (($c1 & 3) << 6); // last 2 bits of c1 become first 2 of c2 $c1 >>= 2; // c1 shifts 2 to the right $ret .= "&#" . ($c1 * 0x100 + $c2) . ";"; // this is the fastest string concatenation $last = $i+1; } elseif ($c1>>4 == 14) { // 1110 xxxx, 110 prefix for 3 bytes unicode $ret .= substr($str, $last, $i-$last); // append all the regular characters we've passed $c2 = ord($str{++$i}); // the next byte $c3 = ord($str{++$i}); // the third byte $c1 &= 15; // remove the 4 bit three bytes prefix $c2 &= 63; // remove the 2 bit trailing byte prefix $c3 &= 63; // remove the 2 bit trailing byte prefix $c3 |= (($c2 & 3) << 6); // last 2 bits of c2 become first 2 of c3 $c2 >>=2; //c2 shifts 2 to the right $c2 |= (($c1 & 15) << 4); // last 4 bits of c1 become first 4 of c2 $c1 >>= 4; // c1 shifts 4 to the right $ret .= '&#' . (($c1 * 0x10000) + ($c2 * 0x100) + $c3) . ';'; // this is the fastest string concatenation $last = $i+1; } } $str=$ret . substr($str, $last, $i); // append the last batch of regular characters return $str; } /** * Convert numeric HTML entities in a string to UTF-8 encoded characters * This is a native alternative to the buggy html_entity_decode() using UTF8 * @param $input string input string * @return string */ function html2utf($str) { // convert named entities to numeric entities $str = strtr($str, String::getHTMLEntities()); // use PCRE-aware replace function to replace numeric entities $str = String::regexp_replace('~&#x([0-9a-f]+);~ei', 'String::code2utf(hexdec("\\1"))', $str); $str = String::regexp_replace('~&#([0-9]+);~e', 'String::code2utf(\\1)', $str); return $str; } /** * Convert UTF-8 numeric entities in a string to ASCII values * This is a helper function for transcoding into HTML/XML * @param $input string input string * @return string */ function html2ascii ($str) { // define the conversion table $entities = array( "~" => "~", " " => " ", "¡" => "!", "¦" => "|", "±" => "+/-", "²" => "2", "³" => "3", "´" => "'", "¹" => "1", "¼" => "1/4", "½" => "1/2", "¾" => "3/4", "¿" => "?", "À" => "A", "Á" => "A", "Â" => "A", "Ã" => "A", "Ä" => "A", "Å" => "A", "Æ" => "AE", "Ç" => "C", "È" => "E", "É" => "E", "Ê" => "E", "Ë" => "E", "Ì" => "I", "Í" => "I", "Î" => "I", "Ï" => "I", "Ð" => "D", "Ñ" => "N", "Ò" => "O", "Ó" => "O", "Ô" => "O", "Õ" => "O", "Ö" => "O", "×" => "x", "Ø" => "O", "Ù" => "U", "Ú" => "U", "Ü" => "U", "Ý" => "Y", "à" => "a", "á" => "a", "â" => "a", "ã" => "a", "ä" => "a", "å" => "a", "æ" => "ae", "ç" => "c", "è" => "e", "é" => "e", "ê" => "e", "ë" => "e", "ì" => "i", "í" => "i", "î" => "i", "ï" => "i", "ð" => "o", "ñ" => "n", "ò" => "o", "ó" => "o", "ô" => "o", "õ" => "o", "ö" => "o", "ø" => "o", "ù" => "u", "ú" => "u", "ü" => "u", "ý" => "y", "ÿ" => "y", "Œ" => "OE", "œ" => "oe", "Š" => "S", "š" => "s", "Ÿ" => "Y", "'" => "'", "ƒ" => "f", "-" => "-", "ˆ" => "^", "˜" => "~", " " => " ", " " => " ", " " => " ", "–" => "-", "—" => "--", "‘" => "'", "’" => "'", "‚" => ",", "“" => '"', "”" => '"', "„" => ",,", "•" => "*", "…" => "...", "‰" => "%o", "′" => "'", "″" => "''", "™" => "TM", "−" => "-", "∗" => "*", "∧" => "/\\", "∨" => "\/", "∼" => "~", "⋅" => "*", "Α" => "A", "Β" => "B", "Ε" => "E", "Ζ" => "Z", "Η" => "H", "Ι" => "|", "Κ" => "K", "Μ" => "M", "Ν" => "N", "Ο" => "O", "Ρ" => "P", "Τ" => "T", "Υ" => "Y", "Χ" => "X", "^" => "^", "ο" => "o", "ρ" => "p", "ς" => "?", "ϑ" => "?", "ϖ" => "?"); return strtr($str, $entities); } /** * Convert Windows CP-1252 numeric entities in a string to named HTML entities * This is a helper function for transcoding into HTML/XML * @param $input string input string * @return string */ function cp1252ToEntities ($str) { // define the conversion table; from: http://www.noqta.it/tc.html $cp1252 = array( "€" => "", "" => "", "‚" => "‚", "ƒ" => "ƒ", "„" => "„", "…" => "…", "†" => "†", "‡" => "‡", "ˆ" => "", "‰" => "‰", "Š" => "Š", "‹" => "‹", "Œ" => "Œ", "" => "", "Ž" => "", "" => "", "" => "", "‘" => "‘", "’" => "’", "“" => "“", "”" => "”", "•" => "•", "–" => "–", "—" => "—", "˜" => "˜", "™" => "™", "š" => "š", "›" => "›", "œ" => "œ", "" => "", "ž" => "", "Ÿ" => "Ÿ"); // corrections to map to valid ISO entities $cp1252["‚"] = "‘"; $cp1252["„"] = "“"; $cp1252["’"] = "’"; $cp1252["”"] = "”"; return strtr($str, $cp1252); } /** * Return an associative array of named->numeric HTML entities * Required to support HTML functions without objects in PHP4/PHP5 * From php.net: function.get-html-translation-table.php * @return string */ function getHTMLEntities () { // define the conversion table $html_entities = array( "Á" => "Á", "á" => "á", "Â" => "Â", "â" => "â", "´" => "´", "Æ" => "Æ", "æ" => "æ", "À" => "À", "à" => "à", "ℵ" => "ℵ", "Α" => "Α", "α" => "α", "&" => "&", "∧" => "∧", "∠" => "∠", "'" => "'", "Å" => "Å", "å" => "å", "≈" => "≈", "Ã" => "Ã", "ã" => "ã", "Ä" => "Ä", "ä" => "ä", "„" => "„", "Β" => "Β", "β" => "β", "¦" => "¦", "•" => "•", "∩" => "∩", "Ç" => "Ç", "ç" => "ç", "¸" => "¸", "¢" => "¢", "Χ" => "Χ", "χ" => "χ", "ˆ" => "^", "♣" => "♣", "≅" => "≅", "©" => "©", "↵" => "↵", "∪" => "∪", "¤" => "¤", "†" => "†", "‡" => "‡", "↓" => "↓", "⇓" => "⇓", "°" => "°", "Δ" => "Δ", "δ" => "δ", "♦" => "♦", "÷" => "÷", "É" => "É", "é" => "é", "Ê" => "Ê", "ê" => "ê", "È" => "È", "è" => "è", "∅" => "∅", " " => " ", " " => " ", "Ε" => "Ε", "ε" => "ε", "≡" => "≡", "Η" => "Η", "η" => "η", "Ð" => "Ð", "ð" => "ð", "Ë" => "Ë", "ë" => "ë", "€" => "€", "∃" => "∃", "ƒ" => "ƒ", "∀" => "∀", "½" => "½", "¼" => "¼", "¾" => "¾", "⁄" => "⁄", "Γ" => "Γ", "γ" => "γ", "≥" => "≥", ">" => ">", "↔" => "↔", "⇔" => "⇔", "♥" => "♥", "…" => "…", "Í" => "Í", "í" => "í", "Î" => "Î", "î" => "î", "¡" => "¡", "Ì" => "Ì", "ì" => "ì", "ℑ" => "ℑ", "∞" => "∞", "∫" => "∫", "Ι" => "Ι", "ι" => "ι", "¿" => "¿", "∈" => "∈", "Ï" => "Ï", "ï" => "ï", "Κ" => "Κ", "κ" => "κ", "Λ" => "Λ", "λ" => "λ", "⟨" => "〈", "«" => "«", "←" => "←", "⇐" => "⇐", "⌈" => "⌈", "“" => "“", "≤" => "≤", "⌊" => "⌊", "∗" => "∗", "◊" => "◊", "‎" => "‎", "‹" => "‹", "‘" => "‘", "<" => "<", "¯" => "¯", "—" => "—", "µ" => "µ", "·" => "·", "−" => "-", "Μ" => "Μ", "μ" => "μ", "∇" => "∇", " " => " ", "–" => "–", "≠" => "≠", "∋" => "∋", "¬" => "¬", "∉" => "∉", "⊄" => "⊄", "Ñ" => "Ñ", "ñ" => "ñ", "Ν" => "Ν", "ν" => "ν", "Ó" => "Ó", "ó" => "ó", "Ô" => "Ô", "ô" => "ô", "Œ" => "Œ", "œ" => "œ", "Ò" => "Ò", "ò" => "ò", "‾" => "‾", "Ω" => "Ω", "ω" => "ω", "Ο" => "Ο", "ο" => "ο", "⊕" => "⊕", "∨" => "∨", "ª" => "ª", "º" => "º", "Ø" => "Ø", "ø" => "ø", "Õ" => "Õ", "õ" => "õ", "⊗" => "⊗", "Ö" => "Ö", "ö" => "ö", "¶" => "¶", "∂" => "∂", "‰" => "‰", "⊥" => "⊥", "Φ" => "Φ", "φ" => "φ", "Π" => "Π", "π" => "π", "ϖ" => "ϖ", "±" => "±", "£" => "£", "′" => "′", "″" => "″", "∏" => "∏", "∝" => "∝", "Ψ" => "Ψ", "ψ" => "ψ", """ => """, "√" => "√", "⟩" => "〉", "»" => "»", "→" => "→", "⇒" => "⇒", "⌉" => "⌉", "”" => "”", "ℜ" => "ℜ", "®" => "®", "⌋" => "⌋", "Ρ" => "Ρ", "ρ" => "ρ", "‏" => "‏", "›" => "›", "’" => "’", "‚" => "‚", "Š" => "Š", "š" => "š", "⋅" => "⋅", "§" => "§", "­" => "­", "Σ" => "Σ", "σ" => "σ", "ς" => "ς", "∼" => "∼", "♠" => "♠", "⊂" => "⊂", "⊆" => "⊆", "∑" => "∑", "¹" => "¹", "²" => "²", "³" => "³", "⊃" => "⊃", "⊇" => "⊇", "ß" => "ß", "Τ" => "Τ", "τ" => "τ", "∴" => "∴", "Θ" => "Θ", "θ" => "θ", "ϑ" => "ϑ", " " => " ", "Þ" => "Þ", "þ" => "þ", "˜" => "~", "×" => "×", "™" => "™", "Ú" => "Ú", "ú" => "ú", "↑" => "↑", "⇑" => "⇑", "Û" => "Û", "û" => "û", "Ù" => "Ù", "ù" => "ù", "¨" => "¨", "ϒ" => "ϒ", "Υ" => "Υ", "υ" => "υ", "Ü" => "Ü", "ü" => "ü", "℘" => "℘", "Ξ" => "Ξ", "ξ" => "ξ", "Ý" => "Ý", "ý" => "ý", "¥" => "¥", "ÿ" => "ÿ", "Ÿ" => "Ÿ", "Ζ" => "Ζ", "ζ" => "ζ", "‍" => "‍", "‌" => "‌"); return $html_entities; } } ?>