|
|
96a9a5 |
--- Unicode-String-2.09/README 2005-10-25 13:56:28.000000000 +0100
|
|
|
96a9a5 |
+++ Unicode-String-2.09/README.utf8 2010-02-18 09:11:45.235669975 +0000
|
|
|
96a9a5 |
@@ -18,8 +18,8 @@
|
|
|
96a9a5 |
o Depreciation because of perl's own utf8 support.
|
|
|
96a9a5 |
|
|
|
96a9a5 |
o Composition/decomposition support:
|
|
|
96a9a5 |
- $u->decomp; # will decomposite as much as possible: "å" --> "a°"
|
|
|
96a9a5 |
- $u->comp; # will composite as much as possible: "a°" --> "å"
|
|
|
96a9a5 |
+ $u->decomp; # will decomposite as much as possible: "å" --> "a°"
|
|
|
96a9a5 |
+ $u->comp; # will composite as much as possible: "a°" --> "å"
|
|
|
96a9a5 |
|
|
|
96a9a5 |
Need separate routines or a special argument to distinguish
|
|
|
96a9a5 |
between compatibility decomposition and canonical decomposition.
|
|
|
96a9a5 |
@@ -64,7 +64,7 @@
|
|
|
96a9a5 |
print $u->latin1;
|
|
|
96a9a5 |
print $u->hex;
|
|
|
96a9a5 |
|
|
|
96a9a5 |
- print latin1("naïve\n")->utf8;
|
|
|
96a9a5 |
+ print utf8("naïve\n")->latin1;
|
|
|
96a9a5 |
|
|
|
96a9a5 |
use Unicode::CharName qw(uname);
|
|
|
96a9a5 |
print uname(ord('$')), "\n";
|
|
|
96a9a5 |
@@ -73,7 +73,7 @@
|
|
|
96a9a5 |
|
|
|
96a9a5 |
COPYRIGHT
|
|
|
96a9a5 |
|
|
|
96a9a5 |
- © 1997-2000,2005 Gisle Aas. All rights reserved.
|
|
|
96a9a5 |
+ © 1997-2000,2005 Gisle Aas. All rights reserved.
|
|
|
96a9a5 |
|
|
|
96a9a5 |
This library is free software; you can redistribute it and/or modify
|
|
|
96a9a5 |
it under the same terms as Perl itself.
|
|
|
96a9a5 |
--- Unicode-String-2.09/String.pm 2005-10-26 09:13:10.000000000 +0100
|
|
|
96a9a5 |
+++ Unicode-String-2.09/String.pm.utf8 2010-02-18 09:11:45.234427359 +0000
|
|
|
96a9a5 |
@@ -597,7 +597,7 @@
|
|
|
96a9a5 |
current value is returned.
|
|
|
96a9a5 |
|
|
|
96a9a5 |
To illustrate the encodings we show how the 2 character sample string
|
|
|
96a9a5 |
-of "µm" (micro meter) is encoded for each one.
|
|
|
96a9a5 |
+of "µm" (micro meter) is encoded for each one.
|
|
|
96a9a5 |
|
|
|
96a9a5 |
=over 4
|
|
|
96a9a5 |
|
|
|
96a9a5 |
@@ -606,7 +606,7 @@
|
|
|
96a9a5 |
=item $us->utf32be( $newval )
|
|
|
96a9a5 |
|
|
|
96a9a5 |
The string passed should be in the UTF-32 encoding with bytes in big
|
|
|
96a9a5 |
-endian order. The sample "µm" is "\0\0\0\xB5\0\0\0m" in this encoding.
|
|
|
96a9a5 |
+endian order. The sample "µm" is "\0\0\0\xB5\0\0\0m" in this encoding.
|
|
|
96a9a5 |
|
|
|
96a9a5 |
Alternative names for this method are utf32() and ucs4().
|
|
|
96a9a5 |
|
|
|
96a9a5 |
@@ -615,14 +615,14 @@
|
|
|
96a9a5 |
=item $us->utf32le( $newval )
|
|
|
96a9a5 |
|
|
|
96a9a5 |
The string passed should be in the UTF-32 encoding with bytes in little
|
|
|
96a9a5 |
-endian order. The sample "µm" is is "\xB5\0\0\0m\0\0\0" in this encoding.
|
|
|
96a9a5 |
+endian order. The sample "µm" is is "\xB5\0\0\0m\0\0\0" in this encoding.
|
|
|
96a9a5 |
|
|
|
96a9a5 |
=item $us->utf16be
|
|
|
96a9a5 |
|
|
|
96a9a5 |
=item $us->utf16be( $newval )
|
|
|
96a9a5 |
|
|
|
96a9a5 |
The string passed should be in the UTF-16 encoding with bytes in big
|
|
|
96a9a5 |
-endian order. The sample "µm" is "\0\xB5\0m" in this encoding.
|
|
|
96a9a5 |
+endian order. The sample "µm" is "\0\xB5\0m" in this encoding.
|
|
|
96a9a5 |
|
|
|
96a9a5 |
Alternative names for this method are utf16() and ucs2().
|
|
|
96a9a5 |
|
|
|
96a9a5 |
@@ -635,7 +635,7 @@
|
|
|
96a9a5 |
=item $us->utf16le( $newval )
|
|
|
96a9a5 |
|
|
|
96a9a5 |
The string passed should be in the UTF-16 encoding with bytes in
|
|
|
96a9a5 |
-little endian order. The sample "µm" is is "\xB5\0m\0" in this
|
|
|
96a9a5 |
+little endian order. The sample "µm" is is "\xB5\0m\0" in this
|
|
|
96a9a5 |
encoding. This is the encoding used by the Microsoft Windows API.
|
|
|
96a9a5 |
|
|
|
96a9a5 |
If the string passed to utf16le() starts with the Unicode byte order
|
|
|
96a9a5 |
@@ -646,14 +646,14 @@
|
|
|
96a9a5 |
|
|
|
96a9a5 |
=item $us->utf8( $newval )
|
|
|
96a9a5 |
|
|
|
96a9a5 |
-The string passed should be in the UTF-8 encoding. The sample "µm" is
|
|
|
96a9a5 |
+The string passed should be in the UTF-8 encoding. The sample "µm" is
|
|
|
96a9a5 |
"\xC2\xB5m" in this encoding.
|
|
|
96a9a5 |
|
|
|
96a9a5 |
=item $us->utf7
|
|
|
96a9a5 |
|
|
|
96a9a5 |
=item $us->utf7( $newval )
|
|
|
96a9a5 |
|
|
|
96a9a5 |
-The string passed should be in the UTF-7 encoding. The sample "µm" is
|
|
|
96a9a5 |
+The string passed should be in the UTF-7 encoding. The sample "µm" is
|
|
|
96a9a5 |
"+ALU-m" in this encoding.
|
|
|
96a9a5 |
|
|
|
96a9a5 |
|
|
|
96a9a5 |
@@ -673,7 +673,7 @@
|
|
|
96a9a5 |
|
|
|
96a9a5 |
=item $us->latin1( $newval )
|
|
|
96a9a5 |
|
|
|
96a9a5 |
-The string passed should be in the ISO-8859-1 encoding. The sample "µm" is
|
|
|
96a9a5 |
+The string passed should be in the ISO-8859-1 encoding. The sample "µm" is
|
|
|
96a9a5 |
"\xB5m" in this encoding.
|
|
|
96a9a5 |
|
|
|
96a9a5 |
Characters outside the "\x00" .. "\xFF" range are simply removed from
|
|
|
96a9a5 |
@@ -688,7 +688,7 @@
|
|
|
96a9a5 |
The string passed should be plain ASCII where each Unicode character
|
|
|
96a9a5 |
is represented by the "U+XXXX" string and separated by a single space
|
|
|
96a9a5 |
character. The "U+" prefix is optional when setting the value. The
|
|
|
96a9a5 |
-sample "µm" is "U+00b5 U+006d" in this encoding.
|
|
|
96a9a5 |
+sample "µm" is "U+00b5 U+006d" in this encoding.
|
|
|
96a9a5 |
|
|
|
96a9a5 |
=back
|
|
|
96a9a5 |
|