root/include/functions_utf.php

Revision 280, 19.1 kB (checked in by Nafania, 2 years ago)

Немного новых запросов к базе. Улучшен пхппшный аннонсер.
Правка мелких багов и добавление мелких фич.
Добавлена фича seo оптимизации, но убедительная просьба - не включать, ибо еще не до конца готово и поддержки я оказывать не буду.

Line 
1 <?php
2
3 if (extension_loaded('mbstring'))</span>
4 <span class="code-keyword">{
5
6     mb_internal_encoding('utf-8');
7
8     if (version_compare(PHP_VERSION, '5.2.0', '>='))
9     {
10         /**
11         * UTF-8 aware alternative to strrpos
12         * @ignore
13         */
14         function utf_strrpos($str,    $needle, $offset = null)
15         {
16             // Emulate behaviour of strrpos rather than raising warning
17             if (empty($str))
18             {
19                 return false;
20             }
21
22             if (is_null($offset))
23             {
24                 return mb_strrpos($str, $needle);
25             }
26             else
27             {
28                 return mb_strrpos($str, $needle, $offset);
29             }
30         }
31     }
32     else
33     {
34         /**
35         * UTF-8 aware alternative to strrpos
36         * @ignore
37         */
38         function utf_strrpos($str,    $needle, $offset = null)
39         {
40             // offset for mb_strrpos was added in 5.2.0
41             if (is_null($offset))
42             {
43                 // Emulate behaviour of strrpos rather than raising warning
44                 if (empty($str))
45                 {
46                     return false;
47                 }
48
49                 return mb_strrpos($str, $needle);
50             }
51             else
52             {
53                 if (!is_int($offset))
54                 {
55                     trigger_error('utf8_strrpos expects parameter 3 to be long', E_USER_ERROR);
56                     return false;
57                 }
58
59                 $str = mb_substr($str, $offset);
60
61                 if (false !== ($pos = mb_strrpos($str, $needle)))
62                 {
63                     return $pos + $offset;
64                 }
65
66                 return false;
67             }
68         }
69     }
70
71     function utf_strtolower($str)
72     {
73         return mb_strtolower($str);
74     }
75
76     function utf_strtoupper($str)
77     {
78         return mb_strtoupper($str);
79     }
80
81     function utf_substr($str, $offset, $length = null)
82     {
83         if (is_null($length))
84         {
85             return mb_substr($str, $offset);
86         }
87         else
88         {
89             return mb_substr($str, $offset, $length);
90         }
91     }
92
93     function utf_strlen($text)
94     {
95         return mb_strlen($text, 'utf-8');
96     }
97 }
98 else {
99     /**
100     * UTF-8 aware alternative to strrpos
101     * Find position of last occurrence of a char in a string
102     *
103     * @author Harry Fuecks
104     * @param string $str haystack
105     * @param string $needle needle
106     * @param integer $offset (optional) offset (from left)
107     * @return mixed integer position or FALSE on failure
108     */
109     function utf_strrpos($str,    $needle, $offset = null)
110     {
111         if (is_null($offset))
112         {
113             $ar    = explode($needle, $str);
114
115             if (sizeof($ar) > 1)
116             {
117                 // Pop off the end of the string where the last    match was made
118                 array_pop($ar);
119                 $str = join($needle, $ar);
120
121                 return utf_strlen($str);
122             }
123             return false;
124         }
125         else
126         {
127             if (!is_int($offset))
128             {
129                 trigger_error('utf_strrpos    expects    parameter 3    to be long', E_USER_ERROR);
130                 return false;
131             }
132
133             $str = utf_substr($str, $offset);
134
135             if (false !== ($pos = utf_strrpos($str, $needle)))
136             {
137                 return $pos    + $offset;
138             }
139
140             return false;
141         }
142     }
143
144     /**
145     * UTF-8 aware alternative to strpos
146     * Find position of first occurrence of a string
147     *
148     * @author Harry Fuecks
149     * @param string $str haystack
150     * @param string $needle needle
151     * @param integer $offset offset in characters (from left)
152     * @return mixed integer position or FALSE on failure
153     */
154     function utf_strpos($str, $needle, $offset = null)
155     {
156         if (is_null($offset))
157         {
158             $ar = explode($needle, $str);
159             if (sizeof($ar) > 1)
160             {
161                 return utf_strlen($ar[0]);
162             }
163             return false;
164         }
165         else
166         {
167             if (!is_int($offset))
168             {
169                 trigger_error('utf_strpos:  Offset must  be an integer', E_USER_ERROR);
170                 return false;
171             }
172
173             $str = utf_substr($str, $offset);
174
175             if (false !== ($pos = utf_strpos($str, $needle)))
176             {
177                 return $pos + $offset;
178             }
179
180             return false;
181         }
182     }
183
184     /**
185     * UTF-8 aware alternative to strtolower
186     * Make a string lowercase
187     * Note: The concept of a characters "case" only exists is some alphabets
188     * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
189     * not exist in the Chinese alphabet, for example. See Unicode Standard
190     * Annex #21: Case Mappings
191     *
192     * @param string
193     * @return string string in lowercase
194     */
195     function utf_strtolower($string)
196     {
197         static $utf8_upper_to_lower = array(
198             "\xC3\x80" => "\xC3\xA0", "\xC3\x81" => "\xC3\xA1",
199             "\xC3\x82" => "\xC3\xA2", "\xC3\x83" => "\xC3\xA3", "\xC3\x84" => "\xC3\xA4", "\xC3\x85" => "\xC3\xA5",
200             "\xC3\x86" => "\xC3\xA6", "\xC3\x87" => "\xC3\xA7", "\xC3\x88" => "\xC3\xA8", "\xC3\x89" => "\xC3\xA9",
201             "\xC3\x8A" => "\xC3\xAA", "\xC3\x8B" => "\xC3\xAB", "\xC3\x8C" => "\xC3\xAC", "\xC3\x8D" => "\xC3\xAD",
202             "\xC3\x8E" => "\xC3\xAE", "\xC3\x8F" => "\xC3\xAF", "\xC3\x90" => "\xC3\xB0", "\xC3\x91" => "\xC3\xB1",
203             "\xC3\x92" => "\xC3\xB2", "\xC3\x93" => "\xC3\xB3", "\xC3\x94" => "\xC3\xB4", "\xC3\x95" => "\xC3\xB5",
204             "\xC3\x96" => "\xC3\xB6", "\xC3\x98" => "\xC3\xB8", "\xC3\x99" => "\xC3\xB9", "\xC3\x9A" => "\xC3\xBA",
205             "\xC3\x9B" => "\xC3\xBB", "\xC3\x9C" => "\xC3\xBC", "\xC3\x9D" => "\xC3\xBD", "\xC3\x9E" => "\xC3\xBE",
206             "\xC4\x80" => "\xC4\x81", "\xC4\x82" => "\xC4\x83", "\xC4\x84" => "\xC4\x85", "\xC4\x86" => "\xC4\x87",
207             "\xC4\x88" => "\xC4\x89", "\xC4\x8A" => "\xC4\x8B", "\xC4\x8C" => "\xC4\x8D", "\xC4\x8E" => "\xC4\x8F",
208             "\xC4\x90" => "\xC4\x91", "\xC4\x92" => "\xC4\x93", "\xC4\x96" => "\xC4\x97", "\xC4\x98" => "\xC4\x99",
209             "\xC4\x9A" => "\xC4\x9B", "\xC4\x9C" => "\xC4\x9D", "\xC4\x9E" => "\xC4\x9F", "\xC4\xA0" => "\xC4\xA1",
210             "\xC4\xA2" => "\xC4\xA3", "\xC4\xA4" => "\xC4\xA5", "\xC4\xA6" => "\xC4\xA7", "\xC4\xA8" => "\xC4\xA9",
211             "\xC4\xAA" => "\xC4\xAB", "\xC4\xAE" => "\xC4\xAF", "\xC4\xB4" => "\xC4\xB5", "\xC4\xB6" => "\xC4\xB7",
212             "\xC4\xB9" => "\xC4\xBA", "\xC4\xBB" => "\xC4\xBC", "\xC4\xBD" => "\xC4\xBE", "\xC5\x81" => "\xC5\x82",
213             "\xC5\x83" => "\xC5\x84", "\xC5\x85" => "\xC5\x86", "\xC5\x87" => "\xC5\x88", "\xC5\x8A" => "\xC5\x8B",
214             "\xC5\x8C" => "\xC5\x8D", "\xC5\x90" => "\xC5\x91", "\xC5\x94" => "\xC5\x95", "\xC5\x96" => "\xC5\x97",
215             "\xC5\x98" => "\xC5\x99", "\xC5\x9A" => "\xC5\x9B", "\xC5\x9C" => "\xC5\x9D", "\xC5\x9E" => "\xC5\x9F",
216             "\xC5\xA0" => "\xC5\xA1", "\xC5\xA2" => "\xC5\xA3", "\xC5\xA4" => "\xC5\xA5", "\xC5\xA6" => "\xC5\xA7",
217             "\xC5\xA8" => "\xC5\xA9", "\xC5\xAA" => "\xC5\xAB", "\xC5\xAC" => "\xC5\xAD", "\xC5\xAE" => "\xC5\xAF",
218             "\xC5\xB0" => "\xC5\xB1", "\xC5\xB2" => "\xC5\xB3", "\xC5\xB4" => "\xC5\xB5", "\xC5\xB6" => "\xC5\xB7",
219             "\xC5\xB8" => "\xC3\xBF", "\xC5\xB9" => "\xC5\xBA", "\xC5\xBB" => "\xC5\xBC", "\xC5\xBD" => "\xC5\xBE",
220             "\xC6\xA0" => "\xC6\xA1", "\xC6\xAF" => "\xC6\xB0", "\xC8\x98" => "\xC8\x99", "\xC8\x9A" => "\xC8\x9B",
221             "\xCE\x86" => "\xCE\xAC", "\xCE\x88" => "\xCE\xAD", "\xCE\x89" => "\xCE\xAE", "\xCE\x8A" => "\xCE\xAF",
222             "\xCE\x8C" => "\xCF\x8C", "\xCE\x8E" => "\xCF\x8D", "\xCE\x8F" => "\xCF\x8E", "\xCE\x91" => "\xCE\xB1",
223             "\xCE\x92" => "\xCE\xB2", "\xCE\x93" => "\xCE\xB3", "\xCE\x94" => "\xCE\xB4", "\xCE\x95" => "\xCE\xB5",
224             "\xCE\x96" => "\xCE\xB6", "\xCE\x97" => "\xCE\xB7", "\xCE\x98" => "\xCE\xB8", "\xCE\x99" => "\xCE\xB9",
225             "\xCE\x9A" => "\xCE\xBA", "\xCE\x9B" => "\xCE\xBB", "\xCE\x9C" => "\xCE\xBC", "\xCE\x9D" => "\xCE\xBD",
226             "\xCE\x9E" => "\xCE\xBE", "\xCE\x9F" => "\xCE\xBF", "\xCE\xA0" => "\xCF\x80", "\xCE\xA1" => "\xCF\x81",
227             "\xCE\xA3" => "\xCF\x83", "\xCE\xA4" => "\xCF\x84", "\xCE\xA5" => "\xCF\x85", "\xCE\xA6" => "\xCF\x86",
228             "\xCE\xA7" => "\xCF\x87", "\xCE\xA8" => "\xCF\x88", "\xCE\xA9" => "\xCF\x89", "\xCE\xAA" => "\xCF\x8A",
229             "\xCE\xAB" => "\xCF\x8B", "\xD0\x81" => "\xD1\x91", "\xD0\x82" => "\xD1\x92", "\xD0\x83" => "\xD1\x93",
230             "\xD0\x84" => "\xD1\x94", "\xD0\x85" => "\xD1\x95", "\xD0\x86" => "\xD1\x96", "\xD0\x87" => "\xD1\x97",
231             "\xD0\x88" => "\xD1\x98", "\xD0\x89" => "\xD1\x99", "\xD0\x8A" => "\xD1\x9A", "\xD0\x8B" => "\xD1\x9B",
232             "\xD0\x8C" => "\xD1\x9C", "\xD0\x8E" => "\xD1\x9E", "\xD0\x8F" => "\xD1\x9F", "\xD0\x90" => "\xD0\xB0",
233             "\xD0\x91" => "\xD0\xB1", "\xD0\x92" => "\xD0\xB2", "\xD0\x93" => "\xD0\xB3", "\xD0\x94" => "\xD0\xB4",
234             "\xD0\x95" => "\xD0\xB5", "\xD0\x96" => "\xD0\xB6", "\xD0\x97" => "\xD0\xB7", "\xD0\x98" => "\xD0\xB8",
235             "\xD0\x99" => "\xD0\xB9", "\xD0\x9A" => "\xD0\xBA", "\xD0\x9B" => "\xD0\xBB", "\xD0\x9C" => "\xD0\xBC",
236             "\xD0\x9D" => "\xD0\xBD", "\xD0\x9E" => "\xD0\xBE", "\xD0\x9F" => "\xD0\xBF", "\xD0\xA0" => "\xD1\x80",
237             "\xD0\xA1" => "\xD1\x81", "\xD0\xA2" => "\xD1\x82", "\xD0\xA3" => "\xD1\x83", "\xD0\xA4" => "\xD1\x84",
238             "\xD0\xA5" => "\xD1\x85", "\xD0\xA6" => "\xD1\x86", "\xD0\xA7" => "\xD1\x87", "\xD0\xA8" => "\xD1\x88",
239             "\xD0\xA9" => "\xD1\x89", "\xD0\xAA" => "\xD1\x8A", "\xD0\xAB" => "\xD1\x8B", "\xD0\xAC" => "\xD1\x8C",
240             "\xD0\xAD" => "\xD1\x8D", "\xD0\xAE" => "\xD1\x8E", "\xD0\xAF" => "\xD1\x8F", "\xD2\x90" => "\xD2\x91",
241             "\xE1\xB8\x82" => "\xE1\xB8\x83", "\xE1\xB8\x8A" => "\xE1\xB8\x8B", "\xE1\xB8\x9E" => "\xE1\xB8\x9F", "\xE1\xB9\x80" => "\xE1\xB9\x81",
242             "\xE1\xB9\x96" => "\xE1\xB9\x97", "\xE1\xB9\xA0" => "\xE1\xB9\xA1", "\xE1\xB9\xAA" => "\xE1\xB9\xAB", "\xE1\xBA\x80" => "\xE1\xBA\x81",
243             "\xE1\xBA\x82" => "\xE1\xBA\x83", "\xE1\xBA\x84" => "\xE1\xBA\x85", "\xE1\xBB\xB2" => "\xE1\xBB\xB3"
244         );
245
246         return strtr(strtolower($string), $utf8_upper_to_lower);
247     }
248
249     /**
250     * UTF-8 aware alternative to strtoupper
251     * Make a string uppercase
252     * Note: The concept of a characters "case" only exists is some alphabets
253     * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
254     * not exist in the Chinese alphabet, for example. See Unicode Standard
255     * Annex #21: Case Mappings
256     *
257     * @param string
258     * @return string string in uppercase
259     */
260     function utf_strtoupper($string)
261     {
262         static $utf8_lower_to_upper = array(
263             "\xC3\xA0" => "\xC3\x80", "\xC3\xA1" => "\xC3\x81",
264             "\xC3\xA2" => "\xC3\x82", "\xC3\xA3" => "\xC3\x83", "\xC3\xA4" => "\xC3\x84", "\xC3\xA5" => "\xC3\x85",
265             "\xC3\xA6" => "\xC3\x86", "\xC3\xA7" => "\xC3\x87", "\xC3\xA8" => "\xC3\x88", "\xC3\xA9" => "\xC3\x89",
266             "\xC3\xAA" => "\xC3\x8A", "\xC3\xAB" => "\xC3\x8B", "\xC3\xAC" => "\xC3\x8C", "\xC3\xAD" => "\xC3\x8D",
267             "\xC3\xAE" => "\xC3\x8E", "\xC3\xAF" => "\xC3\x8F", "\xC3\xB0" => "\xC3\x90", "\xC3\xB1" => "\xC3\x91",
268             "\xC3\xB2" => "\xC3\x92", "\xC3\xB3" => "\xC3\x93", "\xC3\xB4" => "\xC3\x94", "\xC3\xB5" => "\xC3\x95",
269             "\xC3\xB6" => "\xC3\x96", "\xC3\xB8" => "\xC3\x98", "\xC3\xB9" => "\xC3\x99", "\xC3\xBA" => "\xC3\x9A",
270             "\xC3\xBB" => "\xC3\x9B", "\xC3\xBC" => "\xC3\x9C", "\xC3\xBD" => "\xC3\x9D", "\xC3\xBE" => "\xC3\x9E",
271             "\xC3\xBF" => "\xC5\xB8", "\xC4\x81" => "\xC4\x80", "\xC4\x83" => "\xC4\x82", "\xC4\x85" => "\xC4\x84",
272             "\xC4\x87" => "\xC4\x86", "\xC4\x89" => "\xC4\x88", "\xC4\x8B" => "\xC4\x8A", "\xC4\x8D" => "\xC4\x8C",
273             "\xC4\x8F" => "\xC4\x8E", "\xC4\x91" => "\xC4\x90", "\xC4\x93" => "\xC4\x92", "\xC4\x97" => "\xC4\x96",
274             "\xC4\x99" => "\xC4\x98", "\xC4\x9B" => "\xC4\x9A", "\xC4\x9D" => "\xC4\x9C", "\xC4\x9F" => "\xC4\x9E",
275             "\xC4\xA1" => "\xC4\xA0", "\xC4\xA3" => "\xC4\xA2", "\xC4\xA5" => "\xC4\xA4", "\xC4\xA7" => "\xC4\xA6",
276             "\xC4\xA9" => "\xC4\xA8", "\xC4\xAB" => "\xC4\xAA", "\xC4\xAF" => "\xC4\xAE", "\xC4\xB5" => "\xC4\xB4",
277             "\xC4\xB7" => "\xC4\xB6", "\xC4\xBA" => "\xC4\xB9", "\xC4\xBC" => "\xC4\xBB", "\xC4\xBE" => "\xC4\xBD",
278             "\xC5\x82" => "\xC5\x81", "\xC5\x84" => "\xC5\x83", "\xC5\x86" => "\xC5\x85", "\xC5\x88" => "\xC5\x87",
279             "\xC5\x8B" => "\xC5\x8A", "\xC5\x8D" => "\xC5\x8C", "\xC5\x91" => "\xC5\x90", "\xC5\x95" => "\xC5\x94",
280             "\xC5\x97" => "\xC5\x96", "\xC5\x99" => "\xC5\x98", "\xC5\x9B" => "\xC5\x9A", "\xC5\x9D" => "\xC5\x9C",
281             "\xC5\x9F" => "\xC5\x9E", "\xC5\xA1" => "\xC5\xA0", "\xC5\xA3" => "\xC5\xA2", "\xC5\xA5" => "\xC5\xA4",
282             "\xC5\xA7" => "\xC5\xA6", "\xC5\xA9" => "\xC5\xA8", "\xC5\xAB" => "\xC5\xAA", "\xC5\xAD" => "\xC5\xAC",
283             "\xC5\xAF" => "\xC5\xAE", "\xC5\xB1" => "\xC5\xB0", "\xC5\xB3" => "\xC5\xB2", "\xC5\xB5" => "\xC5\xB4",
284             "\xC5\xB7" => "\xC5\xB6", "\xC5\xBA" => "\xC5\xB9", "\xC5\xBC" => "\xC5\xBB", "\xC5\xBE" => "\xC5\xBD",
285             "\xC6\xA1" => "\xC6\xA0", "\xC6\xB0" => "\xC6\xAF", "\xC8\x99" => "\xC8\x98", "\xC8\x9B" => "\xC8\x9A",
286             "\xCE\xAC" => "\xCE\x86", "\xCE\xAD" => "\xCE\x88", "\xCE\xAE" => "\xCE\x89", "\xCE\xAF" => "\xCE\x8A",
287             "\xCE\xB1" => "\xCE\x91", "\xCE\xB2" => "\xCE\x92", "\xCE\xB3" => "\xCE\x93", "\xCE\xB4" => "\xCE\x94",
288             "\xCE\xB5" => "\xCE\x95", "\xCE\xB6" => "\xCE\x96", "\xCE\xB7" => "\xCE\x97", "\xCE\xB8" => "\xCE\x98",
289             "\xCE\xB9" => "\xCE\x99", "\xCE\xBA" => "\xCE\x9A", "\xCE\xBB" => "\xCE\x9B", "\xCE\xBC" => "\xCE\x9C",
290             "\xCE\xBD" => "\xCE\x9D", "\xCE\xBE" => "\xCE\x9E", "\xCE\xBF" => "\xCE\x9F", "\xCF\x80" => "\xCE\xA0",
291             "\xCF\x81" => "\xCE\xA1", "\xCF\x83" => "\xCE\xA3", "\xCF\x84" => "\xCE\xA4", "\xCF\x85" => "\xCE\xA5",
292             "\xCF\x86" => "\xCE\xA6", "\xCF\x87" => "\xCE\xA7", "\xCF\x88" => "\xCE\xA8", "\xCF\x89" => "\xCE\xA9",
293             "\xCF\x8A" => "\xCE\xAA", "\xCF\x8B" => "\xCE\xAB", "\xCF\x8C" => "\xCE\x8C", "\xCF\x8D" => "\xCE\x8E",
294             "\xCF\x8E" => "\xCE\x8F", "\xD0\xB0" => "\xD0\x90", "\xD0\xB1" => "\xD0\x91", "\xD0\xB2" => "\xD0\x92",
295             "\xD0\xB3" => "\xD0\x93", "\xD0\xB4" => "\xD0\x94", "\xD0\xB5" => "\xD0\x95", "\xD0\xB6" => "\xD0\x96",
296             "\xD0\xB7" => "\xD0\x97", "\xD0\xB8" => "\xD0\x98", "\xD0\xB9" => "\xD0\x99", "\xD0\xBA" => "\xD0\x9A",
297             "\xD0\xBB" => "\xD0\x9B", "\xD0\xBC" => "\xD0\x9C", "\xD0\xBD" => "\xD0\x9D", "\xD0\xBE" => "\xD0\x9E",
298             "\xD0\xBF" => "\xD0\x9F", "\xD1\x80" => "\xD0\xA0", "\xD1\x81" => "\xD0\xA1", "\xD1\x82" => "\xD0\xA2",
299             "\xD1\x83" => "\xD0\xA3", "\xD1\x84" => "\xD0\xA4", "\xD1\x85" => "\xD0\xA5", "\xD1\x86" => "\xD0\xA6",
300             "\xD1\x87" => "\xD0\xA7", "\xD1\x88" => "\xD0\xA8", "\xD1\x89" => "\xD0\xA9", "\xD1\x8A" => "\xD0\xAA",
301             "\xD1\x8B" => "\xD0\xAB", "\xD1\x8C" => "\xD0\xAC", "\xD1\x8D" => "\xD0\xAD", "\xD1\x8E" => "\xD0\xAE",
302             "\xD1\x8F" => "\xD0\xAF", "\xD1\x91" => "\xD0\x81", "\xD1\x92" => "\xD0\x82", "\xD1\x93" => "\xD0\x83",
303             "\xD1\x94" => "\xD0\x84", "\xD1\x95" => "\xD0\x85", "\xD1\x96" => "\xD0\x86", "\xD1\x97" => "\xD0\x87",
304             "\xD1\x98" => "\xD0\x88", "\xD1\x99" => "\xD0\x89", "\xD1\x9A" => "\xD0\x8A", "\xD1\x9B" => "\xD0\x8B",
305             "\xD1\x9C" => "\xD0\x8C", "\xD1\x9E" => "\xD0\x8E", "\xD1\x9F" => "\xD0\x8F", "\xD2\x91" => "\xD2\x90",
306             "\xE1\xB8\x83" => "\xE1\xB8\x82", "\xE1\xB8\x8B" => "\xE1\xB8\x8A", "\xE1\xB8\x9F" => "\xE1\xB8\x9E", "\xE1\xB9\x81" => "\xE1\xB9\x80",
307             "\xE1\xB9\x97" => "\xE1\xB9\x96", "\xE1\xB9\xA1" => "\xE1\xB9\xA0", "\xE1\xB9\xAB" => "\xE1\xB9\xAA", "\xE1\xBA\x81" => "\xE1\xBA\x80",
308             "\xE1\xBA\x83" => "\xE1\xBA\x82", "\xE1\xBA\x85" => "\xE1\xBA\x84", "\xE1\xBB\xB3" => "\xE1\xBB\xB2"
309         );
310
311         return strtr(strtoupper($string), $utf8_lower_to_upper);
312     }
313
314     /**
315     * UTF-8 aware alternative to substr
316     * Return part of a string given character offset (and optionally length)
317     *
318     * Note arguments: comparied to substr - if offset or length are
319     * not integers, this version will not complain but rather massages them
320     * into an integer.
321     *
322     * Note on returned values: substr documentation states false can be
323     * returned in some cases (e.g. offset > string length)
324     * mb_substr never returns false, it will return an empty string instead.
325     * This adopts the mb_substr approach
326     *
327     * Note on implementation: PCRE only supports repetitions of less than
328     * 65536, in order to accept up to MAXINT values for offset and length,
329     * we'll repeat a group of 65535 characters when needed.
330     *
331     * Note on implementation: calculating the number of characters in the
332     * string is a relatively expensive operation, so we only carry it out when
333     * necessary. It isn't necessary for +ve offsets and no specified length
334     *
335     * @author Chris Smith<chris@jalakai.co.uk>
336     * @param string $str
337     * @param integer $offset number of UTF-8 characters offset (from left)
338     * @param integer $length (optional) length in UTF-8 characters from offset
339     * @return mixed string or FALSE if failure
340     */
341     function utf_substr($str, $offset, $length = NULL)
342     {
343         // generates E_NOTICE
344         // for PHP4 objects, but not PHP5 objects
345         $str = (string) $str;
346         $offset = (int) $offset;
347         if (!is_null($length))
348         {
349             $length = (int) $length;
350         }
351
352         // handle trivial cases
353         if ($length === 0 || ($offset < 0 && $length < 0 && $length < $offset))
354         {
355             return '';
356         }
357
358         // normalise negative offsets (we could use a tail
359         // anchored pattern, but they are horribly slow!)
360         if ($offset < 0)
361         {
362             // see notes
363             $strlen = utf_strlen($str);
364             $offset = $strlen + $offset;
365             if ($offset < 0)
366             {
367                 $offset = 0;
368             }
369         }
370
371         $op = '';
372         $lp = '';
373
374         // establish a pattern for offset, a
375         // non-captured group equal in length to offset
376         if ($offset > 0)
377         {
378             $ox = (int) ($offset / 65535);
379             $oy = $offset % 65535;
380
381             if ($ox)
382             {
383                 $op = '(?:.{65535}){' . $ox . '}';
384             }
385
386             $op = '^(?:' . $op . '.{' . $oy . '})';
387         }
388         else
389         {
390             // offset == 0; just anchor the pattern
391             $op = '^';
392         }
393
394         // establish a pattern for length
395         if (is_null($length))
396         {
397             // the rest of the string
398             $lp = '(.*)$';
399         }
400         else
401         {
402             if (!isset($strlen))
403             {
404                 // see notes
405                 $strlen = utf_strlen($str);
406             }
407
408             // another trivial case
409             if ($offset > $strlen)
410             {
411                 return '';
412             }
413
414             if ($length > 0)
415             {
416                 // reduce any length that would
417                 // go passed the end of the string
418                 $length = min($strlen - $offset, $length);
419
420                 $lx = (int) ($length / 65535);
421                 $ly = $length % 65535;
422
423                 // negative length requires a captured group
424                 // of length characters
425                 if ($lx)
426                 {
427                     $lp = '(?:.{65535}){' . $lx . '}';
428                 }
429                 $lp = '(' . $lp . '.{'. $ly . '})';
430             }
431             else if ($length < 0)
432             {
433                 if ($length < ($offset - $strlen))
434                 {
435                     return '';
436                 }
437
438                 $lx = (int)((-$length) / 65535);
439                 $ly = (-$length) % 65535;
440
441                 // negative length requires ... capture everything
442                 // except a group of  -length characters
443                 // anchored at the tail-end of the string
444                 if ($lx)
445                 {
446                     $lp = '(?:.{65535}){' . $lx . '}';
447                 }
448                 $lp = '(.*)(?:' . $lp . '.{' . $ly . '})$';
449             }
450         }
451
452         if (!preg_match('#' . $op . $lp . '#us', $str, $match))
453         {
454             return '';
455         }
456
457         return $match[1];
458     }
459
460     /**
461     * Return the length (in characters) of a UTF-8 string
462     *
463     * @param    string    $text        UTF-8 string
464     * @return    integer                Length (in chars) of given string
465     */
466     function utf_strlen($text)
467     {
468         // Since utf8_decode is replacing multibyte characters to ? strlen works fine
469         return strlen(utf8_decode($text));
470     }
471 }
472
473 /**</span>
474 <span class="code-comment">* UTF-8 aware alternative to ucfirst
475 * Make a string's first character uppercase
476 *
477 * @author Harry Fuecks
478 * @param string
479 * @return string with first character as upper case (if applicable)
480 */
481 function utf_ucfirst($str)</span>
482 <span class="code-keyword">{
483     switch (utf_strlen($str))
484     {
485         case 0:
486             return '';
487         break;
488
489         case 1:
490             return utf_strtoupper($str);
491         break;
492
493         default:
494             preg_match('/^(.{1})(.*)$/us', $str, $matches);
495             return utf_strtoupper($matches[1]) . $matches[2];
496         break;
497     }
498 }
499
500 /**</span>
501 <span class="code-comment">* UTF-8 aware alternative to str_split
502 * Convert a string to an array
503 *
504 * @author Harry Fuecks
505 * @param string $str UTF-8 encoded
506 * @param int $split_len number to characters to split string by
507 * @return array characters in string reverses
508 */
509 function utf_str_split($str, $split_len = 1)</span>
510 <span class="code-keyword">{
511     if (!is_int($split_len) || $split_len < 1)
512     {
513         return false;
514     }
515
516     $len = utf_strlen($str);
517     if ($len <= $split_len)
518     {
519         return array($str);
520     }
521
522     preg_match_all('/.{' . $split_len . '}|[^\x00]{1,' . $split_len . '}$/us', $str, $ar);
523     return $ar[0];
524 }
525 ?>
Note: See TracBrowser for help on using the browser.