4:27
  	   ˆÀÅ0¿Ã  /   ˆl–Ðz¾”êC]ŠeA
à_¸ÔÅ£ÆƒyöÛ_‹uÐb&=LV[(ÁÅ "û    package Encode::Unicode;

use strict;
use warnings;
no warnings 'redefine';

our $VERSION = do { my @r = ( q$Revision: 2.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };

use XSLoader;
XSLoader::load( __PACKAGE__, $VERSION );

#
# Object Generator 8 transcoders all at once!
#

require Encode;

our %BOM_Unknown = map { $_ => 1 } qw(UTF-16 UTF-32);

for my $name (
    qw(UTF-16 UTF-16BE UTF-16LE
    UTF-32 UTF-32BE UTF-32LE
    UCS-2BE  UCS-2LE)
  )
{
    my ( $size, $endian, $ucs2, $mask );
    $name =~ /^(\w+)-(\d+)(\w*)$/o;
    if ( $ucs2 = ( $1 eq 'UCS' ) ) {
        $size = 2;
    }
    else {
        $size = $2 / 8;
    }
    $endian = ( $3 eq 'BE' ) ? 'n' : ( $3 eq 'LE' ) ? 'v' : '';
    $size == 4 and $endian = uc($endian);

    $Encode::Encoding{$name} = bless {
        Name   => $name,
        size   => $size,
        endian => $endian,
        ucs2   => $ucs2,
    } => __PACKAGE__;
}

use base qw(Encode::Encoding);

sub renew {
    my $self = shift;
    $BOM_Unknown{ $self->name } or return $self;
    my $clone = bless {%$self} => ref($self);
    $clone->{renewed}++;    # so the caller knows it is renewed.
    return $clone;
}

# There used to be a perl implemntation of (en|de)code but with
# XS version is ripe, perl version is zapped for optimal speed

*decode = \&decode_xs;
*encode = \&encode_xs;

1;
__END__

=head1 NAME

Encode::Unicode -- Various Unicode Transformation Formats

=cut

=head1 SYNOPSIS

    use Encode qw/encode decode/;
    $ucs2 = encode("UCS-2BE", $utf8);
    $utf8 = decode("UCS-2BE", $ucs2);

=head1 ABSTRACT

This module implements all Character Encoding Schemes of Unicode that
are officially documented by Unicode Consortium (except, of course,
for UTF-8, which is a native format in perl).

=over 4

=item L<http://www.unicode.org/glossary/> says:

I<Character Encoding Scheme> A character encoding form plus byte
serialization. There are Seven character encoding schemes in Unicode:
UTF-8, UTF-16, UTF-16BE, UTF-16LE, UTF-32 (UCS-4), UTF-32BE (UCS-4BE) and
UTF-32LE (UCS-4LE), and UTF-7.

Since UTF-7 is a 7-bit (re)encoded version of UTF-16BE, It is not part of
Unicode's Character Encoding Scheme.  It is separately implemented in
Encode::Unicode::UTF7.  For details see L<Encode::Unicode::UTF7>.

=item Quick Reference

                Decodes from ord(N)           Encodes chr(N) to...
       octet/char BOM S.P d800-dfff  ord > 0xffff     \x{1abcd} ==
  ---------------+-----------------+------------------------------
  UCS-2BE       2   N   N  is bogus                  Not Available
  UCS-2LE       2   N   N     bogus                  Not Available
  UTF-16      2/4   Y   Y  is   S.P           S.P            BE/LE
  UTF-16BE    2/4   N   Y       S.P           S.P    0xd82a,0xdfcd
  UTF-16LE    2/4   N   Y       S.P           S.P    0x2ad8,0xcddf
  UTF-32        4   Y   -  is bogus         As is            BE/LE
  UTF-32BE      4   N   -     bogus         As is       0x0001abcd
  UTF-32LE      4   N   -     bogus         As is       0xcdab0100
  UTF-8       1-4   -   -     bogus   >= 4 octets   \xf0\x9a\af\8d
  ---------------+-----------------+------------------------------

=back

=head1 Size, Endianness, and BOM

You can categorize these CES by 3 criteria:  size of each character,
endianness, and Byte Order Mark.

=head2 by size

UCS-2 is a fixed-length encoding with each character taking 16 bits.
It B<does not> support I<surrogate pairs>.  When a surrogate pair
is encountered during decode(), its place is filled with \x{FFFD}
if I<CHECK> is 0, or the routine croaks if I<CHECK> is 1.  When a
character whose ord value is larger than 0xFFFF is encountered,
its place is filled with \x{FFFD} if I<CHECK> is 0, or the routine
croaks if I<CHECK> is 1.

UTF-16 is almost the same as UCS-2 but it supports I<surrogate pairs>.
When it encounters a high surrogate (0xD800-0xDBFF), it fetches the
following low surrogate (0xDC00-0xDFFF) and C<desurrogate>s them to
form a character.  Bogus surrogates result in death.  When