-
Notifications
You must be signed in to change notification settings - Fork 2
/
func.php
84 lines (76 loc) · 1.57 KB
/
func.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
<?php
/**
* Count UTF-8 btyes number
*
* @param string $unicode
* @return integer
*/
function bytes_cnt($unicode)
{
$head = intval($unicode);
$number = 1;
$bitsmask = ($head & 0xf0);
if ($bitsmask >= 0x80) {
switch ($bitsmask) {
case 0xf0:
$number = 4;
break;
case 0xe0:
$number = 3;
break;
case 0xc0:
$number = 2;
break;
}
}
return $number;
}
/**
* Utf8 to unicode codepoint
*
* @param string $unicode
* @return integer
*/
function utf8_to_cop($unicode)
{
$offset = 0;
$head = substr($unicode, $offset, 1);
$ascii = ord($head);
$num = bytes_cnt($ascii);
if ($num > 1) {
$codepoint = $ascii & ((1<<(7 - $num)) - 1);
for ($i = 1; $i < $num; $i++) {
$char = ord(substr($unicode, $offset + $i, 1));
$codepoint = ($codepoint << 6) | ($char & 0x3f);
}
return $codepoint;
}
return $ascii;
}
/**
* Unicode codepoint to utf8
*
* @param integer $codepoint
* @return string
*/
function cop_to_utf8($codepoint)
{
$symbol = iconv('UCS-4BE', 'UTF-8//IGNORE', pack('N', $codepoint));
return $symbol;
}
/**
* Cursor of utf8 encode string
*
* @param string $string
* @return Generator
*/
function utf8_cursor($string)
{
$size = strlen($string);
for ($i=0, $step=bytes_cnt(ord($string[$i]));
$i<$size;
$i+=$step, $step=bytes_cnt(ord(@$string[$i]))
) {
yield substr($string, $i, $step);
}
}