| 1 |
<?php |
|---|
| 2 |
|
|---|
| 3 |
if (extension_loaded('mbstring'))</span> |
|---|
| 4 |
<span class="code-keyword">{ |
|---|
| 5 |
|
|---|
| 6 |
mb_internal_encoding('utf-8'); |
|---|
| 7 |
|
|---|
| 8 |
if (version_compare(PHP_VERSION, '5.2.0', '>=')) |
|---|
| 9 |
{ |
|---|
| 10 |
|
|---|
| 11 |
* UTF-8 aware alternative to strrpos |
|---|
| 12 |
* @ignore |
|---|
| 13 |
*/ |
|---|
| 14 |
function utf_strrpos($str, $needle, $offset = null) |
|---|
| 15 |
{ |
|---|
| 16 |
|
|---|
| 17 |
if (empty($str)) |
|---|
| 18 |
{ |
|---|
| 19 |
return false; |
|---|
| 20 |
} |
|---|
| 21 |
|
|---|
| 22 |
if (is_null($offset)) |
|---|
| 23 |
{ |
|---|
| 24 |
return mb_strrpos($str, $needle); |
|---|
| 25 |
} |
|---|
| 26 |
else |
|---|
| 27 |
{ |
|---|
| 28 |
return mb_strrpos($str, $needle, $offset); |
|---|
| 29 |
} |
|---|
| 30 |
} |
|---|
| 31 |
} |
|---|
| 32 |
else |
|---|
| 33 |
{ |
|---|
| 34 |
|
|---|
| 35 |
* UTF-8 aware alternative to strrpos |
|---|
| 36 |
* @ignore |
|---|
| 37 |
*/ |
|---|
| 38 |
function utf_strrpos($str, $needle, $offset = null) |
|---|
| 39 |
{ |
|---|
| 40 |
|
|---|
| 41 |
if (is_null($offset)) |
|---|
| 42 |
{ |
|---|
| 43 |
|
|---|
| 44 |
if (empty($str)) |
|---|
| 45 |
{ |
|---|
| 46 |
return false; |
|---|
| 47 |
} |
|---|
| 48 |
|
|---|
| 49 |
return mb_strrpos($str, $needle); |
|---|
| 50 |
} |
|---|
| 51 |
else |
|---|
| 52 |
{ |
|---|
| 53 |
if (!is_int($offset)) |
|---|
| 54 |
{ |
|---|
| 55 |
trigger_error('utf8_strrpos expects parameter 3 to be long', E_USER_ERROR); |
|---|
| 56 |
return false; |
|---|
| 57 |
} |
|---|
| 58 |
|
|---|
| 59 |
$str = mb_substr($str, $offset); |
|---|
| 60 |
|
|---|
| 61 |
if (false !== ($pos = mb_strrpos($str, $needle))) |
|---|
| 62 |
{ |
|---|
| 63 |
return $pos + $offset; |
|---|
| 64 |
} |
|---|
| 65 |
|
|---|
| 66 |
return false; |
|---|
| 67 |
} |
|---|
| 68 |
} |
|---|
| 69 |
} |
|---|
| 70 |
|
|---|
| 71 |
function utf_strtolower($str) |
|---|
| 72 |
{ |
|---|
| 73 |
return mb_strtolower($str); |
|---|
| 74 |
} |
|---|
| 75 |
|
|---|
| 76 |
function utf_strtoupper($str) |
|---|
| 77 |
{ |
|---|
| 78 |
return mb_strtoupper($str); |
|---|
| 79 |
} |
|---|
| 80 |
|
|---|
| 81 |
function utf_substr($str, $offset, $length = null) |
|---|
| 82 |
{ |
|---|
| 83 |
if (is_null($length)) |
|---|
| 84 |
{ |
|---|
| 85 |
return mb_substr($str, $offset); |
|---|
| 86 |
} |
|---|
| 87 |
else |
|---|
| 88 |
{ |
|---|
| 89 |
return mb_substr($str, $offset, $length); |
|---|
| 90 |
} |
|---|
| 91 |
} |
|---|
| 92 |
|
|---|
| 93 |
function utf_strlen($text) |
|---|
| 94 |
{ |
|---|
| 95 |
return mb_strlen($text, 'utf-8'); |
|---|
| 96 |
} |
|---|
| 97 |
} |
|---|
| 98 |
else { |
|---|
| 99 |
|
|---|
| 100 |
* UTF-8 aware alternative to strrpos |
|---|
| 101 |
* Find position of last occurrence of a char in a string |
|---|
| 102 |
* |
|---|
| 103 |
* @author Harry Fuecks |
|---|
| 104 |
* @param string $str haystack |
|---|
| 105 |
* @param string $needle needle |
|---|
| 106 |
* @param integer $offset (optional) offset (from left) |
|---|
| 107 |
* @return mixed integer position or FALSE on failure |
|---|
| 108 |
*/ |
|---|
| 109 |
function utf_strrpos($str, $needle, $offset = null) |
|---|
| 110 |
{ |
|---|
| 111 |
if (is_null($offset)) |
|---|
| 112 |
{ |
|---|
| 113 |
$ar = explode($needle, $str); |
|---|
| 114 |
|
|---|
| 115 |
if (sizeof($ar) > 1) |
|---|
| 116 |
{ |
|---|
| 117 |
|
|---|
| 118 |
array_pop($ar); |
|---|
| 119 |
$str = join($needle, $ar); |
|---|
| 120 |
|
|---|
| 121 |
return utf_strlen($str); |
|---|
| 122 |
} |
|---|
| 123 |
return false; |
|---|
| 124 |
} |
|---|
| 125 |
else |
|---|
| 126 |
{ |
|---|
| 127 |
if (!is_int($offset)) |
|---|
| 128 |
{ |
|---|
| 129 |
trigger_error('utf_strrpos expects parameter 3 to be long', E_USER_ERROR); |
|---|
| 130 |
return false; |
|---|
| 131 |
} |
|---|
| 132 |
|
|---|
| 133 |
$str = utf_substr($str, $offset); |
|---|
| 134 |
|
|---|
| 135 |
if (false !== ($pos = utf_strrpos($str, $needle))) |
|---|
| 136 |
{ |
|---|
| 137 |
return $pos + $offset; |
|---|
| 138 |
} |
|---|
| 139 |
|
|---|
| 140 |
return false; |
|---|
| 141 |
} |
|---|
| 142 |
} |
|---|
| 143 |
|
|---|
| 144 |
|
|---|
| 145 |
* UTF-8 aware alternative to strpos |
|---|
| 146 |
* Find position of first occurrence of a string |
|---|
| 147 |
* |
|---|
| 148 |
* @author Harry Fuecks |
|---|
| 149 |
* @param string $str haystack |
|---|
| 150 |
* @param string $needle needle |
|---|
| 151 |
* @param integer $offset offset in characters (from left) |
|---|
| 152 |
* @return mixed integer position or FALSE on failure |
|---|
| 153 |
*/ |
|---|
| 154 |
function utf_strpos($str, $needle, $offset = null) |
|---|
| 155 |
{ |
|---|
| 156 |
if (is_null($offset)) |
|---|
| 157 |
{ |
|---|
| 158 |
$ar = explode($needle, $str); |
|---|
| 159 |
if (sizeof($ar) > 1) |
|---|
| 160 |
{ |
|---|
| 161 |
return utf_strlen($ar[0]); |
|---|
| 162 |
} |
|---|
| 163 |
return false; |
|---|
| 164 |
} |
|---|
| 165 |
else |
|---|
| 166 |
{ |
|---|
| 167 |
if (!is_int($offset)) |
|---|
| 168 |
{ |
|---|
| 169 |
trigger_error('utf_strpos: Offset must be an integer', E_USER_ERROR); |
|---|
| 170 |
return false; |
|---|
| 171 |
} |
|---|
| 172 |
|
|---|
| 173 |
$str = utf_substr($str, $offset); |
|---|
| 174 |
|
|---|
| 175 |
if (false !== ($pos = utf_strpos($str, $needle))) |
|---|
| 176 |
{ |
|---|
| 177 |
return $pos + $offset; |
|---|
| 178 |
} |
|---|
| 179 |
|
|---|
| 180 |
return false; |
|---|
| 181 |
} |
|---|
| 182 |
} |
|---|
| 183 |
|
|---|
| 184 |
|
|---|
| 185 |
* UTF-8 aware alternative to strtolower |
|---|
| 186 |
* Make a string lowercase |
|---|
| 187 |
* Note: The concept of a characters "case" only exists is some alphabets |
|---|
| 188 |
* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does |
|---|
| 189 |
* not exist in the Chinese alphabet, for example. See Unicode Standard |
|---|
| 190 |
* Annex #21: Case Mappings |
|---|
| 191 |
* |
|---|
| 192 |
* @param string |
|---|
| 193 |
* @return string string in lowercase |
|---|
| 194 |
*/ |
|---|
| 195 |
function utf_strtolower($string) |
|---|
| 196 |
{ |
|---|
| 197 |
static $utf8_upper_to_lower = array( |
|---|
| 198 |
"\xC3\x80" => "\xC3\xA0", "\xC3\x81" => "\xC3\xA1", |
|---|
| 199 |
"\xC3\x82" => "\xC3\xA2", "\xC3\x83" => "\xC3\xA3", "\xC3\x84" => "\xC3\xA4", "\xC3\x85" => "\xC3\xA5", |
|---|
| 200 |
"\xC3\x86" => "\xC3\xA6", "\xC3\x87" => "\xC3\xA7", "\xC3\x88" => "\xC3\xA8", "\xC3\x89" => "\xC3\xA9", |
|---|
| 201 |
"\xC3\x8A" => "\xC3\xAA", "\xC3\x8B" => "\xC3\xAB", "\xC3\x8C" => "\xC3\xAC", "\xC3\x8D" => "\xC3\xAD", |
|---|
| 202 |
"\xC3\x8E" => "\xC3\xAE", "\xC3\x8F" => "\xC3\xAF", "\xC3\x90" => "\xC3\xB0", "\xC3\x91" => "\xC3\xB1", |
|---|
| 203 |
"\xC3\x92" => "\xC3\xB2", "\xC3\x93" => "\xC3\xB3", "\xC3\x94" => "\xC3\xB4", "\xC3\x95" => "\xC3\xB5", |
|---|
| 204 |
"\xC3\x96" => "\xC3\xB6", "\xC3\x98" => "\xC3\xB8", "\xC3\x99" => "\xC3\xB9", "\xC3\x9A" => "\xC3\xBA", |
|---|
| 205 |
"\xC3\x9B" => "\xC3\xBB", "\xC3\x9C" => "\xC3\xBC", "\xC3\x9D" => "\xC3\xBD", "\xC3\x9E" => "\xC3\xBE", |
|---|
| 206 |
"\xC4\x80" => "\xC4\x81", "\xC4\x82" => "\xC4\x83", "\xC4\x84" => "\xC4\x85", "\xC4\x86" => "\xC4\x87", |
|---|
| 207 |
"\xC4\x88" => "\xC4\x89", "\xC4\x8A" => "\xC4\x8B", "\xC4\x8C" => "\xC4\x8D", "\xC4\x8E" => "\xC4\x8F", |
|---|
| 208 |
"\xC4\x90" => "\xC4\x91", "\xC4\x92" => "\xC4\x93", "\xC4\x96" => "\xC4\x97", "\xC4\x98" => "\xC4\x99", |
|---|
| 209 |
"\xC4\x9A" => "\xC4\x9B", "\xC4\x9C" => "\xC4\x9D", "\xC4\x9E" => "\xC4\x9F", "\xC4\xA0" => "\xC4\xA1", |
|---|
| 210 |
"\xC4\xA2" => "\xC4\xA3", "\xC4\xA4" => "\xC4\xA5", "\xC4\xA6" => "\xC4\xA7", "\xC4\xA8" => "\xC4\xA9", |
|---|
| 211 |
"\xC4\xAA" => "\xC4\xAB", "\xC4\xAE" => "\xC4\xAF", "\xC4\xB4" => "\xC4\xB5", "\xC4\xB6" => "\xC4\xB7", |
|---|
| 212 |
"\xC4\xB9" => "\xC4\xBA", "\xC4\xBB" => "\xC4\xBC", "\xC4\xBD" => "\xC4\xBE", "\xC5\x81" => "\xC5\x82", |
|---|
| 213 |
"\xC5\x83" => "\xC5\x84", "\xC5\x85" => "\xC5\x86", "\xC5\x87" => "\xC5\x88", "\xC5\x8A" => "\xC5\x8B", |
|---|
| 214 |
"\xC5\x8C" => "\xC5\x8D", "\xC5\x90" => "\xC5\x91", "\xC5\x94" => "\xC5\x95", "\xC5\x96" => "\xC5\x97", |
|---|
| 215 |
"\xC5\x98" => "\xC5\x99", "\xC5\x9A" => "\xC5\x9B", "\xC5\x9C" => "\xC5\x9D", "\xC5\x9E" => "\xC5\x9F", |
|---|
| 216 |
"\xC5\xA0" => "\xC5\xA1", "\xC5\xA2" => "\xC5\xA3", "\xC5\xA4" => "\xC5\xA5", "\xC5\xA6" => "\xC5\xA7", |
|---|
| 217 |
"\xC5\xA8" => "\xC5\xA9", "\xC5\xAA" => "\xC5\xAB", "\xC5\xAC" => "\xC5\xAD", "\xC5\xAE" => "\xC5\xAF", |
|---|
| 218 |
"\xC5\xB0" => "\xC5\xB1", "\xC5\xB2" => "\xC5\xB3", "\xC5\xB4" => "\xC5\xB5", "\xC5\xB6" => "\xC5\xB7", |
|---|
| 219 |
"\xC5\xB8" => "\xC3\xBF", "\xC5\xB9" => "\xC5\xBA", "\xC5\xBB" => "\xC5\xBC", "\xC5\xBD" => "\xC5\xBE", |
|---|
| 220 |
"\xC6\xA0" => "\xC6\xA1", "\xC6\xAF" => "\xC6\xB0", "\xC8\x98" => "\xC8\x99", "\xC8\x9A" => "\xC8\x9B", |
|---|
| 221 |
"\xCE\x86" => "\xCE\xAC", "\xCE\x88" => "\xCE\xAD", "\xCE\x89" => "\xCE\xAE", "\xCE\x8A" => "\xCE\xAF", |
|---|
| 222 |
"\xCE\x8C" => "\xCF\x8C", "\xCE\x8E" => "\xCF\x8D", "\xCE\x8F" => "\xCF\x8E", "\xCE\x91" => "\xCE\xB1", |
|---|
| 223 |
"\xCE\x92" => "\xCE\xB2", "\xCE\x93" => "\xCE\xB3", "\xCE\x94" => "\xCE\xB4", "\xCE\x95" => "\xCE\xB5", |
|---|
| 224 |
"\xCE\x96" => "\xCE\xB6", "\xCE\x97" => "\xCE\xB7", "\xCE\x98" => "\xCE\xB8", "\xCE\x99" => "\xCE\xB9", |
|---|
| 225 |
"\xCE\x9A" => "\xCE\xBA", "\xCE\x9B" => "\xCE\xBB", "\xCE\x9C" => "\xCE\xBC", "\xCE\x9D" => "\xCE\xBD", |
|---|
| 226 |
"\xCE\x9E" => "\xCE\xBE", "\xCE\x9F" => "\xCE\xBF", "\xCE\xA0" => "\xCF\x80", "\xCE\xA1" => "\xCF\x81", |
|---|
| 227 |
"\xCE\xA3" => "\xCF\x83", "\xCE\xA4" => "\xCF\x84", "\xCE\xA5" => "\xCF\x85", "\xCE\xA6" => "\xCF\x86", |
|---|
| 228 |
"\xCE\xA7" => "\xCF\x87", "\xCE\xA8" => "\xCF\x88", "\xCE\xA9" => "\xCF\x89", "\xCE\xAA" => "\xCF\x8A", |
|---|
| 229 |
"\xCE\xAB" => "\xCF\x8B", "\xD0\x81" => "\xD1\x91", "\xD0\x82" => "\xD1\x92", "\xD0\x83" => "\xD1\x93", |
|---|
| 230 |
"\xD0\x84" => "\xD1\x94", "\xD0\x85" => "\xD1\x95", "\xD0\x86" => "\xD1\x96", "\xD0\x87" => "\xD1\x97", |
|---|
| 231 |
"\xD0\x88" => "\xD1\x98", "\xD0\x89" => "\xD1\x99", "\xD0\x8A" => "\xD1\x9A", "\xD0\x8B" => "\xD1\x9B", |
|---|
| 232 |
"\xD0\x8C" => "\xD1\x9C", "\xD0\x8E" => "\xD1\x9E", "\xD0\x8F" => "\xD1\x9F", "\xD0\x90" => "\xD0\xB0", |
|---|
| 233 |
"\xD0\x91" => "\xD0\xB1", "\xD0\x92" => "\xD0\xB2", "\xD0\x93" => "\xD0\xB3", "\xD0\x94" => "\xD0\xB4", |
|---|
| 234 |
"\xD0\x95" => "\xD0\xB5", "\xD0\x96" => "\xD0\xB6", "\xD0\x97" => "\xD0\xB7", "\xD0\x98" => "\xD0\xB8", |
|---|
| 235 |
"\xD0\x99" => "\xD0\xB9", "\xD0\x9A" => "\xD0\xBA", "\xD0\x9B" => "\xD0\xBB", "\xD0\x9C" => "\xD0\xBC", |
|---|
| 236 |
"\xD0\x9D" => "\xD0\xBD", "\xD0\x9E" => "\xD0\xBE", "\xD0\x9F" => "\xD0\xBF", "\xD0\xA0" => "\xD1\x80", |
|---|
| 237 |
"\xD0\xA1" => "\xD1\x81", "\xD0\xA2" => "\xD1\x82", "\xD0\xA3" => "\xD1\x83", "\xD0\xA4" => "\xD1\x84", |
|---|
| 238 |
"\xD0\xA5" => "\xD1\x85", "\xD0\xA6" => "\xD1\x86", "\xD0\xA7" => "\xD1\x87", "\xD0\xA8" => "\xD1\x88", |
|---|
| 239 |
"\xD0\xA9" => "\xD1\x89", "\xD0\xAA" => "\xD1\x8A", "\xD0\xAB" => "\xD1\x8B", "\xD0\xAC" => "\xD1\x8C", |
|---|
| 240 |
"\xD0\xAD" => "\xD1\x8D", "\xD0\xAE" => "\xD1\x8E", "\xD0\xAF" => "\xD1\x8F", "\xD2\x90" => "\xD2\x91", |
|---|
| 241 |
"\xE1\xB8\x82" => "\xE1\xB8\x83", "\xE1\xB8\x8A" => "\xE1\xB8\x8B", "\xE1\xB8\x9E" => "\xE1\xB8\x9F", "\xE1\xB9\x80" => "\xE1\xB9\x81", |
|---|
| 242 |
"\xE1\xB9\x96" => "\xE1\xB9\x97", "\xE1\xB9\xA0" => "\xE1\xB9\xA1", "\xE1\xB9\xAA" => "\xE1\xB9\xAB", "\xE1\xBA\x80" => "\xE1\xBA\x81", |
|---|
| 243 |
"\xE1\xBA\x82" => "\xE1\xBA\x83", "\xE1\xBA\x84" => "\xE1\xBA\x85", "\xE1\xBB\xB2" => "\xE1\xBB\xB3" |
|---|
| 244 |
); |
|---|
| 245 |
|
|---|
| 246 |
return strtr(strtolower($string), $utf8_upper_to_lower); |
|---|
| 247 |
} |
|---|
| 248 |
|
|---|
| 249 |
|
|---|
| 250 |
* UTF-8 aware alternative to strtoupper |
|---|
| 251 |
* Make a string uppercase |
|---|
| 252 |
* Note: The concept of a characters "case" only exists is some alphabets |
|---|
| 253 |
* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does |
|---|
| 254 |
* not exist in the Chinese alphabet, for example. See Unicode Standard |
|---|
| 255 |
* Annex #21: Case Mappings |
|---|
| 256 |
* |
|---|
| 257 |
* @param string |
|---|
| 258 |
* @return string string in uppercase |
|---|
| 259 |
*/ |
|---|
| 260 |
function utf_strtoupper($string) |
|---|
| 261 |
{ |
|---|
| 262 |
static $utf8_lower_to_upper = array( |
|---|
| 263 |
"\xC3\xA0" => "\xC3\x80", "\xC3\xA1" => "\xC3\x81", |
|---|
| 264 |
"\xC3\xA2" => "\xC3\x82", "\xC3\xA3" => "\xC3\x83", "\xC3\xA4" => "\xC3\x84", "\xC3\xA5" => "\xC3\x85", |
|---|
| 265 |
"\xC3\xA6" => "\xC3\x86", "\xC3\xA7" => "\xC3\x87", "\xC3\xA8" => "\xC3\x88", "\xC3\xA9" => "\xC3\x89", |
|---|
| 266 |
"\xC3\xAA" => "\xC3\x8A", "\xC3\xAB" => "\xC3\x8B", "\xC3\xAC" => "\xC3\x8C", "\xC3\xAD" => "\xC3\x8D", |
|---|
| 267 |
"\xC3\xAE" => "\xC3\x8E", "\xC3\xAF" => "\xC3\x8F", "\xC3\xB0" => "\xC3\x90", "\xC3\xB1" => "\xC3\x91", |
|---|
| 268 |
"\xC3\xB2" => "\xC3\x92", "\xC3\xB3" => "\xC3\x93", "\xC3\xB4" => "\xC3\x94", "\xC3\xB5" => "\xC3\x95", |
|---|
| 269 |
"\xC3\xB6" => "\xC3\x96", "\xC3\xB8" => "\xC3\x98", "\xC3\xB9" => "\xC3\x99", "\xC3\xBA" => "\xC3\x9A", |
|---|
| 270 |
"\xC3\xBB" => "\xC3\x9B", "\xC3\xBC" => "\xC3\x9C", "\xC3\xBD" => "\xC3\x9D", "\xC3\xBE" => "\xC3\x9E", |
|---|
| 271 |
"\xC3\xBF" => "\xC5\xB8", "\xC4\x81" => "\xC4\x80", "\xC4\x83" => "\xC4\x82", "\xC4\x85" => "\xC4\x84", |
|---|
| 272 |
"\xC4\x87" => "\xC4\x86", "\xC4\x89" => "\xC4\x88", "\xC4\x8B" => "\xC4\x8A", "\xC4\x8D" => "\xC4\x8C", |
|---|
| 273 |
"\xC4\x8F" => "\xC4\x8E", "\xC4\x91" => "\xC4\x90", "\xC4\x93" => "\xC4\x92", "\xC4\x97" => "\xC4\x96", |
|---|
| 274 |
"\xC4\x99" => "\xC4\x98", "\xC4\x9B" => "\xC4\x9A", "\xC4\x9D" => "\xC4\x9C", "\xC4\x9F" => "\xC4\x9E", |
|---|
| 275 |
"\xC4\xA1" => "\xC4\xA0", "\xC4\xA3" => "\xC4\xA2", "\xC4\xA5" => "\xC4\xA4", "\xC4\xA7" => "\xC4\xA6", |
|---|
| 276 |
"\xC4\xA9" => "\xC4\xA8", "\xC4\xAB" => "\xC4\xAA", "\xC4\xAF" => "\xC4\xAE", "\xC4\xB5" => "\xC4\xB4", |
|---|
| 277 |
"\xC4\xB7" => "\xC4\xB6", "\xC4\xBA" => "\xC4\xB9", "\xC4\xBC" => "\xC4\xBB", "\xC4\xBE" => "\xC4\xBD", |
|---|
| 278 |
"\xC5\x82" => "\xC5\x81", "\xC5\x84" => "\xC5\x83", "\xC5\x86" => "\xC5\x85", "\xC5\x88" => "\xC5\x87", |
|---|
| 279 |
"\xC5\x8B" => "\xC5\x8A", "\xC5\x8D" => "\xC5\x8C", "\xC5\x91" => "\xC5\x90", "\xC5\x95" => "\xC5\x94", |
|---|
| 280 |
"\xC5\x97" => "\xC5\x96", "\xC5\x99" => "\xC5\x98", "\xC5\x9B" => "\xC5\x9A", "\xC5\x9D" => "\xC5\x9C", |
|---|
| 281 |
"\xC5\x9F" => "\xC5\x9E", "\xC5\xA1" => "\xC5\xA0", "\xC5\xA3" => "\xC5\xA2", "\xC5\xA5" => "\xC5\xA4", |
|---|
| 282 |
"\xC5\xA7" => "\xC5\xA6", "\xC5\xA9" => "\xC5\xA8", "\xC5\xAB" => "\xC5\xAA", "\xC5\xAD" => "\xC5\xAC", |
|---|
| 283 |
"\xC5\xAF" => "\xC5\xAE", "\xC5\xB1" => "\xC5\xB0", "\xC5\xB3" => "\xC5\xB2", "\xC5\xB5" => "\xC5\xB4", |
|---|
| 284 |
"\xC5\xB7" => "\xC5\xB6", "\xC5\xBA" => "\xC5\xB9", "\xC5\xBC" => "\xC5\xBB", "\xC5\xBE" => "\xC5\xBD", |
|---|
| 285 |
"\xC6\xA1" => "\xC6\xA0", "\xC6\xB0" => "\xC6\xAF", "\xC8\x99" => "\xC8\x98", "\xC8\x9B" => "\xC8\x9A", |
|---|
| 286 |
"\xCE\xAC" => "\xCE\x86", "\xCE\xAD" => "\xCE\x88", "\xCE\xAE" => "\xCE\x89", "\xCE\xAF" => "\xCE\x8A", |
|---|
| 287 |
"\xCE\xB1" => "\xCE\x91", "\xCE\xB2" => "\xCE\x92", "\xCE\xB3" => "\xCE\x93", "\xCE\xB4" => "\xCE\x94", |
|---|
| 288 |
"\xCE\xB5" => "\xCE\x95", "\xCE\xB6" => "\xCE\x96", "\xCE\xB7" => "\xCE\x97", "\xCE\xB8" => "\xCE\x98", |
|---|
| 289 |
"\xCE\xB9" => "\xCE\x99", "\xCE\xBA" => "\xCE\x9A", "\xCE\xBB" => "\xCE\x9B", "\xCE\xBC" => "\xCE\x9C", |
|---|
| 290 |
"\xCE\xBD" => "\xCE\x9D", "\xCE\xBE" => "\xCE\x9E", "\xCE\xBF" => "\xCE\x9F", "\xCF\x80" => "\xCE\xA0", |
|---|
| 291 |
"\xCF\x81" => "\xCE\xA1", "\xCF\x83" => "\xCE\xA3", "\xCF\x84" => "\xCE\xA4", "\xCF\x85" => "\xCE\xA5", |
|---|
| 292 |
"\xCF\x86" => "\xCE\xA6", "\xCF\x87" => "\xCE\xA7", "\xCF\x88" => "\xCE\xA8", "\xCF\x89" => "\xCE\xA9", |
|---|
| 293 |
"\xCF\x8A" => "\xCE\xAA", "\xCF\x8B" => "\xCE\xAB", "\xCF\x8C" => "\xCE\x8C", "\xCF\x8D" => "\xCE\x8E", |
|---|
| 294 |
"\xCF\x8E" => "\xCE\x8F", "\xD0\xB0" => "\xD0\x90", "\xD0\xB1" => "\xD0\x91", "\xD0\xB2" => "\xD0\x92", |
|---|
| 295 |
"\xD0\xB3" => "\xD0\x93", "\xD0\xB4" => "\xD0\x94", "\xD0\xB5" => "\xD0\x95", "\xD0\xB6" => "\xD0\x96", |
|---|
| 296 |
"\xD0\xB7" => "\xD0\x97", "\xD0\xB8" => "\xD0\x98", "\xD0\xB9" => "\xD0\x99", "\xD0\xBA" => "\xD0\x9A", |
|---|
| 297 |
"\xD0\xBB" => "\xD0\x9B", "\xD0\xBC" => "\xD0\x9C", "\xD0\xBD" => "\xD0\x9D", "\xD0\xBE" => "\xD0\x9E", |
|---|
| 298 |
"\xD0\xBF" => "\xD0\x9F", "\xD1\x80" => "\xD0\xA0", "\xD1\x81" => "\xD0\xA1", "\xD1\x82" => "\xD0\xA2", |
|---|
| 299 |
"\xD1\x83" => "\xD0\xA3", "\xD1\x84" => "\xD0\xA4", "\xD1\x85" => "\xD0\xA5", "\xD1\x86" => "\xD0\xA6", |
|---|
| 300 |
"\xD1\x87" => "\xD0\xA7", "\xD1\x88" => "\xD0\xA8", "\xD1\x89" => "\xD0\xA9", "\xD1\x8A" => "\xD0\xAA", |
|---|
| 301 |
"\xD1\x8B" => "\xD0\xAB", "\xD1\x8C" => "\xD0\xAC", "\xD1\x8D" => "\xD0\xAD", "\xD1\x8E" => "\xD0\xAE", |
|---|
| 302 |
"\xD1\x8F" => "\xD0\xAF", "\xD1\x91" => "\xD0\x81", "\xD1\x92" => "\xD0\x82", "\xD1\x93" => "\xD0\x83", |
|---|
| 303 |
"\xD1\x94" => "\xD0\x84", "\xD1\x95" => "\xD0\x85", "\xD1\x96" => "\xD0\x86", "\xD1\x97" => "\xD0\x87", |
|---|
| 304 |
"\xD1\x98" => "\xD0\x88", "\xD1\x99" => "\xD0\x89", "\xD1\x9A" => "\xD0\x8A", "\xD1\x9B" => "\xD0\x8B", |
|---|
| 305 |
"\xD1\x9C" => "\xD0\x8C", "\xD1\x9E" => "\xD0\x8E", "\xD1\x9F" => "\xD0\x8F", "\xD2\x91" => "\xD2\x90", |
|---|
| 306 |
"\xE1\xB8\x83" => "\xE1\xB8\x82", "\xE1\xB8\x8B" => "\xE1\xB8\x8A", "\xE1\xB8\x9F" => "\xE1\xB8\x9E", "\xE1\xB9\x81" => "\xE1\xB9\x80", |
|---|
| 307 |
"\xE1\xB9\x97" => "\xE1\xB9\x96", "\xE1\xB9\xA1" => "\xE1\xB9\xA0", "\xE1\xB9\xAB" => "\xE1\xB9\xAA", "\xE1\xBA\x81" => "\xE1\xBA\x80", |
|---|
| 308 |
"\xE1\xBA\x83" => "\xE1\xBA\x82", "\xE1\xBA\x85" => "\xE1\xBA\x84", "\xE1\xBB\xB3" => "\xE1\xBB\xB2" |
|---|
| 309 |
); |
|---|
| 310 |
|
|---|
| 311 |
return strtr(strtoupper($string), $utf8_lower_to_upper); |
|---|
| 312 |
} |
|---|
| 313 |
|
|---|
| 314 |
|
|---|
| 315 |
* UTF-8 aware alternative to substr |
|---|
| 316 |
* Return part of a string given character offset (and optionally length) |
|---|
| 317 |
* |
|---|
| 318 |
* Note arguments: comparied to substr - if offset or length are |
|---|
| 319 |
* not integers, this version will not complain but rather massages them |
|---|
| 320 |
* into an integer. |
|---|
| 321 |
* |
|---|
| 322 |
* Note on returned values: substr documentation states false can be |
|---|
| 323 |
* returned in some cases (e.g. offset > string length) |
|---|
| 324 |
* mb_substr never returns false, it will return an empty string instead. |
|---|
| 325 |
* This adopts the mb_substr approach |
|---|
| 326 |
* |
|---|
| 327 |
* Note on implementation: PCRE only supports repetitions of less than |
|---|
| 328 |
* 65536, in order to accept up to MAXINT values for offset and length, |
|---|
| 329 |
* we'll repeat a group of 65535 characters when needed. |
|---|
| 330 |
* |
|---|
| 331 |
* Note on implementation: calculating the number of characters in the |
|---|
| 332 |
* string is a relatively expensive operation, so we only carry it out when |
|---|
| 333 |
* necessary. It isn't necessary for +ve offsets and no specified length |
|---|
| 334 |
* |
|---|
| 335 |
* @author Chris Smith<chris@jalakai.co.uk> |
|---|
| 336 |
* @param string $str |
|---|
| 337 |
* @param integer $offset number of UTF-8 characters offset (from left) |
|---|
| 338 |
* @param integer $length (optional) length in UTF-8 characters from offset |
|---|
| 339 |
* @return mixed string or FALSE if failure |
|---|
| 340 |
*/ |
|---|
| 341 |
function utf_substr($str, $offset, $length = NULL) |
|---|
| 342 |
{ |
|---|
| 343 |
|
|---|
| 344 |
// for PHP4 objects, but not PHP5 objects |
|---|
| 345 |
$str = (string) $str; |
|---|
| 346 |
$offset = (int) $offset; |
|---|
| 347 |
if (!is_null($length)) |
|---|
| 348 |
{ |
|---|
| 349 |
$length = (int) $length; |
|---|
| 350 |
} |
|---|
| 351 |
|
|---|
| 352 |
|
|---|
| 353 |
if ($length === 0 || ($offset < 0 && $length < 0 && $length < $offset)) |
|---|
| 354 |
{ |
|---|
| 355 |
return ''; |
|---|
| 356 |
} |
|---|
| 357 |
|
|---|
| 358 |
|
|---|
| 359 |
// anchored pattern, but they are horribly slow!) |
|---|
| 360 |
if ($offset < 0) |
|---|
| 361 |
{ |
|---|
| 362 |
|
|---|
| 363 |
$strlen = utf_strlen($str); |
|---|
| 364 |
$offset = $strlen + $offset; |
|---|
| 365 |
if ($offset < 0) |
|---|
| 366 |
{ |
|---|
| 367 |
$offset = 0; |
|---|
| 368 |
} |
|---|
| 369 |
} |
|---|
| 370 |
|
|---|
| 371 |
$op = ''; |
|---|
| 372 |
$lp = ''; |
|---|
| 373 |
|
|---|
| 374 |
|
|---|
| 375 |
// non-captured group equal in length to offset |
|---|
| 376 |
if ($offset > 0) |
|---|
| 377 |
{ |
|---|
| 378 |
$ox = (int) ($offset / 65535); |
|---|
| 379 |
$oy = $offset % 65535; |
|---|
| 380 |
|
|---|
| 381 |
if ($ox) |
|---|
| 382 |
{ |
|---|
| 383 |
$op = '(?:.{65535}){' . $ox . '}'; |
|---|
| 384 |
} |
|---|
| 385 |
|
|---|
| 386 |
$op = '^(?:' . $op . '.{' . $oy . '})'; |
|---|
| 387 |
} |
|---|
| 388 |
else |
|---|
| 389 |
{ |
|---|
| 390 |
|
|---|
| 391 |
$op = '^'; |
|---|
| 392 |
} |
|---|
| 393 |
|
|---|
| 394 |
|
|---|
| 395 |
if (is_null($length)) |
|---|
| 396 |
{ |
|---|
| 397 |
|
|---|
| 398 |
$lp = '(.*)$'; |
|---|
| 399 |
} |
|---|
| 400 |
else |
|---|
| 401 |
{ |
|---|
| 402 |
if (!isset($strlen)) |
|---|
| 403 |
{ |
|---|
| 404 |
|
|---|
| 405 |
$strlen = utf_strlen($str); |
|---|
| 406 |
} |
|---|
| 407 |
|
|---|
| 408 |
|
|---|
| 409 |
if ($offset > $strlen) |
|---|
| 410 |
{ |
|---|
| 411 |
return ''; |
|---|
| 412 |
} |
|---|
| 413 |
|
|---|
| 414 |
if ($length > 0) |
|---|
| 415 |
{ |
|---|
| 416 |
|
|---|
| 417 |
// go passed the end of the string |
|---|
| 418 |
$length = min($strlen - $offset, $length); |
|---|
| 419 |
|
|---|
| 420 |
$lx = (int) ($length / 65535); |
|---|
| 421 |
$ly = $length % 65535; |
|---|
| 422 |
|
|---|
| 423 |
|
|---|
| 424 |
// of length characters |
|---|
| 425 |
if ($lx) |
|---|
| 426 |
{ |
|---|
| 427 |
$lp = '(?:.{65535}){' . $lx . '}'; |
|---|
| 428 |
} |
|---|
| 429 |
$lp = '(' . $lp . '.{'. $ly . '})'; |
|---|
| 430 |
} |
|---|
| 431 |
else if ($length < 0) |
|---|
| 432 |
{ |
|---|
| 433 |
if ($length < ($offset - $strlen)) |
|---|
| 434 |
{ |
|---|
| 435 |
return ''; |
|---|
| 436 |
} |
|---|
| 437 |
|
|---|
| 438 |
$lx = (int)((-$length) / 65535); |
|---|
| 439 |
$ly = (-$length) % 65535; |
|---|
| 440 |
|
|---|
| 441 |
|
|---|
| 442 |
// except a group of -length characters |
|---|
| 443 |
// anchored at the tail-end of the string |
|---|
| 444 |
if ($lx) |
|---|
| 445 |
{ |
|---|
| 446 |
$lp = '(?:.{65535}){' . $lx . '}'; |
|---|
| 447 |
} |
|---|
| 448 |
$lp = '(.*)(?:' . $lp . '.{' . $ly . '})$'; |
|---|
| 449 |
} |
|---|
| 450 |
} |
|---|
| 451 |
|
|---|
| 452 |
if (!preg_match('#' . $op . $lp . '#us', $str, $match)) |
|---|
| 453 |
{ |
|---|
| 454 |
return ''; |
|---|
| 455 |
} |
|---|
| 456 |
|
|---|
| 457 |
return $match[1]; |
|---|
| 458 |
} |
|---|
| 459 |
|
|---|
| 460 |
|
|---|
| 461 |
* Return the length (in characters) of a UTF-8 string |
|---|
| 462 |
* |
|---|
| 463 |
* @param string $text UTF-8 string |
|---|
| 464 |
* @return integer Length (in chars) of given string |
|---|
| 465 |
*/ |
|---|
| 466 |
function utf_strlen($text) |
|---|
| 467 |
{ |
|---|
| 468 |
|
|---|
| 469 |
return strlen(utf8_decode($text)); |
|---|
| 470 |
} |
|---|
| 471 |
} |
|---|
| 472 |
|
|---|
| 473 |
|
|---|
| 474 |
<span class="code-comment">* UTF-8 aware alternative to ucfirst |
|---|
| 475 |
* Make a string's first character uppercase |
|---|
| 476 |
* |
|---|
| 477 |
* @author Harry Fuecks |
|---|
| 478 |
* @param string |
|---|
| 479 |
* @return string with first character as upper case (if applicable) |
|---|
| 480 |
*/ |
|---|
| 481 |
function utf_ucfirst($str)</span> |
|---|
| 482 |
<span class="code-keyword">{ |
|---|
| 483 |
switch (utf_strlen($str)) |
|---|
| 484 |
{ |
|---|
| 485 |
case 0: |
|---|
| 486 |
return ''; |
|---|
| 487 |
break; |
|---|
| 488 |
|
|---|
| 489 |
case 1: |
|---|
| 490 |
return utf_strtoupper($str); |
|---|
| 491 |
break; |
|---|
| 492 |
|
|---|
| 493 |
default: |
|---|
| 494 |
preg_match('/^(.{1})(.*)$/us', $str, $matches); |
|---|
| 495 |
return utf_strtoupper($matches[1]) . $matches[2]; |
|---|
| 496 |
break; |
|---|
| 497 |
} |
|---|
| 498 |
} |
|---|
| 499 |
|
|---|
| 500 |
|
|---|
| 501 |
<span class="code-comment">* UTF-8 aware alternative to str_split |
|---|
| 502 |
* Convert a string to an array |
|---|
| 503 |
* |
|---|
| 504 |
* @author Harry Fuecks |
|---|
| 505 |
* @param string $str UTF-8 encoded |
|---|
| 506 |
* @param int $split_len number to characters to split string by |
|---|
| 507 |
* @return array characters in string reverses |
|---|
| 508 |
*/ |
|---|
| 509 |
function utf_str_split($str, $split_len = 1)</span> |
|---|
| 510 |
<span class="code-keyword">{ |
|---|
| 511 |
if (!is_int($split_len) || $split_len < 1) |
|---|
| 512 |
{ |
|---|
| 513 |
return false; |
|---|
| 514 |
} |
|---|
| 515 |
|
|---|
| 516 |
$len = utf_strlen($str); |
|---|
| 517 |
if ($len <= $split_len) |
|---|
| 518 |
{ |
|---|
| 519 |
return array($str); |
|---|
| 520 |
} |
|---|
| 521 |
|
|---|
| 522 |
preg_match_all('/.{' . $split_len . '}|[^\x00]{1,' . $split_len . '}$/us', $str, $ar); |
|---|
| 523 |
return $ar[0]; |
|---|
| 524 |
} |
|---|
| 525 |
?> |
|---|