PHP 转编码函数
PHP转编码函数
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
<?php
function
isUTF8
(
$str
)
{
return
preg_match
(
'/^([\x09\x0A\x0D\x20-\x7E]|[\xC2][\xA0-\xBF]|[\xC3-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*$/'
,
$str
)
;
}
function
isISO88591
(
$str
)
{
return
preg_match
(
'/^([\x09\x0A\x0D\x20-\x7E\xA0-\xFF])*$/'
,
$str
)
;
}
function
isCP1252
(
$str
)
{
return
preg_match
(
'/^([\x09\x0A\x0D\x20-\x7E\x80\x82-\x8C\x8E\x91-\x9C\x9E-\xFF])*$/'
,
$str
)
;
}
// Generate a UTF-8 encoded character from the code point
function
utf8Char
(
$codePoint
)
{
$char
=
''
;
if
(
$codePoint
<
0
)
{
return
false
;
}
elseif
(
$codePoint
<=
0x007f
)
{
$char
.
=
chr
(
$codePoint
)
;
}
elseif
(
$codePoint
<=
0x07ff
)
{
$char
.
=
chr
(
0xc0
|
(
$codePoint
>>
6
)
)
;
$char
.
=
chr
(
0x80
|
(
$codePoint
&
0x003f
)
)
;
}
elseif
(
$codePoint
==
0xFEFF
)
{
// nop -- zap the BOM
}
elseif
(
$codePoint
>=
0xD800
&&
$codePoint
<=
0xDFFF
)
{
// found a surrogate
return
false
;
}
elseif
(
$codePoint
<=
0xffff
)
{
$char
.
=
chr
(
0xe0
|
(
$codePoint
>>
12
)
)
;
$char
.
=
chr
(
0x80
|
(
(
$codePoint
>>
6
)
&
0x003f
)
)
;
$char
.
=
chr
(
0x80
|
(
$codePoint
&
0x003f
)
)
;
}
elseif
(
$codePoint
<=
0x10ffff
)
{
$char
.
=
chr
(
0xf0
|
(
$codePoint
>>
18
)
)
;
$char
.
=
chr
(
0x80
|
(
(
$codePoint
>>
12
)
&
0x3f
)
)
;
$char
.
=
chr
(
0x80
|
(
(
$codePoint
>>
6
)
&
0x3f
)
)
;
$char
.
=
chr
(
0x80
|
(
$codePoint
&
0x3f
)
)
;
}
else
{
// out of range
return
false
;
}
return
$char
;
}
// Callback function for utf8FromCP1252()
function
utf8FromCP1252Char
(
$char
)
{
$utf8CodePoint
=
array
(
128
=
>
0x20AC
,
129
=
>
''
,
130
=
>
0x201A
,
131
=
>
0x0192
,
132
=
>
0x201E
,
133
=
>
0x2026
,
134
=
>
0x2020
,
135
=
>
0x2021
,
136
=
>
0x02C6
,
137
=
>
0x2030
,
138
=
>
0x0160
,
139
=
>
0x2039
,
140
=
>
0x0152
,
141
=
>
''
,
142
=
>
0x017D
,
143
=
>
''
,
144
=
>
''
,
145
=
>
0x2018
,
146
=
>
0x2019
,
147
=
>
0x201C
,
148
=
>
0x201D
,
149
=
>
0x2022
,
150
=
>
0x2013
,
151
=
>
0x2014
,
152
=
>
0x02DC
,
153
=
>
0x2122
,
154
=
>
0x0161
,
155
=
>
0x203A
,
156
=
>
0x0153
,
157
=
>
''
,
158
=
>
0x017E
,
159
=
>
0x0178
)
;
$cp1252CodePoint
=
ord
(
$char
)
;
return
utf8Char
(
$utf8CodePoint
[
$cp1252CodePoint
]
)
;
}
// Convert the encoding of a string from Windows-1252 to UTF-8
function
utf8FromCP1252
(
$string
)
{
if
(
isCP1252
(
$string
)
)
{
$utf8String
=
utf8_encode
(
$string
)
;
return
preg_replace_callback
(
'|\xC2([\x80\x82-\x8C\x8E\x91-\x9C\x9E\x9F])|'
,
create_function
(
'$s'
,
'return utf8FromCP1252Char($s[1]);'
)
,
$utf8String
)
;
}
else
{
return
''
;
}
}
?>
|
转自http://lachy.id.au/dev/2005/11/encoding-functions-source
收 藏
成长的对话版权声明:以上内容作者已申请原创保护,未经允许不得转载,侵权必究!授权事宜、对本内容有异议或投诉,敬请联系网站管理员,我们将尽快回复您,谢谢合作!