﻿/* ****************************************************************************
 *
 * Copyright (c) Microsoft Corporation. 
 *
 * This source code is subject to terms and conditions of the Apache License, Version 2.0. A 
 * copy of the license can be found in the License.html file at the root of this distribution. If 
 * you cannot locate the  Apache License, Version 2.0, please send an email to 
 * ironruby@microsoft.com. By using this source code in any fashion, you are agreeing to be bound 
 * by the terms of the Apache License, Version 2.0.
 *
 * You must not remove this notice, or any other, from this software.
 *
 *
 * ***************************************************************************/

#if FEATURE_CORE_DLR
using System.Linq.Expressions;
using System.Linq;
#else
using Microsoft.Scripting.Ast;
#endif

using System;
using System.Collections.Generic;
using System.Text;
using System.Diagnostics;
using System.Threading;
using System.Runtime.Serialization;
using Microsoft.Scripting.Utils;
using Microsoft.Scripting.Runtime;
using IronRuby.Runtime;
using IronRuby.Compiler;

namespace IronRuby.Builtins {
    /// <summary>
    /// All encodings in Ruby are represented by instances of a single class. Therefore we need to wrap .NET Encoding class in RubyEncoding.
    /// Instances of this class are app-domain singletons. That's all right as far as the class is readonly and doesn't implement IRubyObject.
    /// Taint, frozen flags and instance variables need to be stored in per-runtime lookaside table.
    /// </summary>
    [Serializable]
    public class RubyEncoding : ISerializable, IExpressionSerializable {
        #region Singletons
        ///<summary>
        /// The following table lists the supported encodings and their associated code pages.
        /// An asterisk in the last column indicates that the code page is natively supported by the .NET Framework, regardless of the underlying platform.
        /// </summary>
        /// <remarks>http://msdn.microsoft.com/en-us/library/system.text.encoding.aspx</remarks>
        private static EncodingInfo[] g_encodingsInfo = Encoding.GetEncodings();

        private static readonly IDictionary<string, string> g_encodingsNameMap = new ReadOnlyDictionary<string, string>(new SortedDictionary<string, string> {
            { "utf-8", "UTF-8" },
            { "us-ascii", "US-ASCII" },
            { "unicodeFFFE", "UTF-16BE"},
            { "utf-16", "UTF-16LE"},
            { "utf-7", "UTF-7"},
            { "utf-32BE", "UTF-32BE"},
            { "utf-32", "UTF-32LE"},
            { "shift_jis", "Shift_JIS"},
            { "euc-jp", "EUC-JP"},
            //{ "EUC-JP", "CP20932"}, // MEMO: treated in GetCLREncodingName
            { "iso-2022-jp", "ISO-2022-JP"},
            // { "iso-2022-jp", "CP50222"}, // MEMO: treated in GetCLREncodingName, not treated in GetRubyEncodingName :(
        });

        private static string GetCLREncodingName(string rubyEncodingName) {
            string clrEncodingName = g_encodingsNameMap.SingleOrDefault(mapClrRuby => mapClrRuby.Value == rubyEncodingName).Key;
            if (string.IsNullOrEmpty(clrEncodingName)) {
                if (rubyEncodingName.StartsWith("CP")) {
                    int codePage;
                    if (Int32.TryParse(rubyEncodingName.Substring(2), out codePage)) {
                        clrEncodingName = GetEncodingName(codePage);
                    }
                }
            }
            if (string.IsNullOrEmpty(clrEncodingName)) {
                // MEMO: ensure encoding exists.
                clrEncodingName = GetEncodingName(GetEncodingCodePage(rubyEncodingName));
            }
            return clrEncodingName;
        }

        private static string GetRubyEncodingName(string clrEncodingName) {
            return g_encodingsNameMap.SingleOrDefault(pairKeyValue => pairKeyValue.Key == clrEncodingName).Value ?? clrEncodingName;
        }

        public static int GetEncodingCodePage(string name) {
            EncodingInfo encodingInfo = g_encodingsInfo.FirstOrDefault(info => info.Name == name);
            if (encodingInfo == null) {
                throw RubyExceptions.CreateArgumentError("Unknown encoding name: {0}", name);
            }
            return encodingInfo.CodePage;
        }

        public static string GetEncodingName(int codePage) {
            if (codePage == 0) {
                return "ASCII-8BIT";
            }
            EncodingInfo encodingInfo = g_encodingsInfo.SingleOrDefault(info => info.CodePage == codePage);
            if (encodingInfo == null) {
                throw RubyExceptions.CreateArgumentError("Unknown encoding code page: {0}", codePage);
            }
            return encodingInfo.Name;
        }

        public const int CodePageBinary = 0;
        public const int CodePageSJIS = 932;
        public const int CodePageBig5 = 950;
        public const int CodePageAscii = 20127;

        // Windows returns 2 EUC-JP encodings (CP 20932 and CP 51932). Mono implements EUC-JP as 51932 and doesn't support 20932.
        public const int CodePageEUCJP = 51932;

        public const int CodePageUTF7 = 65000;
        public const int CodePageUTF8 = 65001;
        public const int CodePageUTF16BE = 1201;
        public const int CodePageUTF16LE = 1200;
        public const int CodePageUTF32BE = 12001;
        public const int CodePageUTF32LE = 12000;

        // TODO: how does MRI sort encodings?

        public static readonly RubyEncoding/*!*/ Binary = new RubyEncoding(BinaryEncoding.Instance, BinaryEncoding.Instance, -4);
        public static readonly RubyEncoding/*!*/ UTF8 = new RubyEncoding(CreateEncoding(CodePageUTF8, false), CreateEncoding(CodePageUTF8, true), -3);

#if FEATURE_ENCODING
        public static readonly RubyEncoding/*!*/ Ascii = new RubyEncoding(CreateEncoding(CodePageAscii, false), CreateEncoding(CodePageAscii, true), -2);
        public static readonly RubyEncoding/*!*/ EUCJP = new RubyEncoding(CreateEncoding(CodePageEUCJP, false), CreateEncoding(CodePageEUCJP, true), -1);
        public static readonly RubyEncoding/*!*/ SJIS = new RubyEncoding(CreateEncoding(CodePageSJIS, false), CreateEncoding(CodePageSJIS, true), 0);
#else
        public static readonly RubyEncoding/*!*/ Ascii = new RubyEncoding(AsciiEncoding.Instance, AsciiEncoding.Instance, -2);
#endif
        #endregion

        private readonly Encoding/*!*/ _encoding;
        private readonly Encoding/*!*/ _strictEncoding;
        private Expression _expression;
        private readonly int _ordinal;

        // TODO: combine into a single integer (tables could be merged)
        private readonly int _maxBytesPerChar;
        private readonly bool _isAsciiIdentity;
#if FEATURE_ENCODING
        private bool? _isSingleByteCharacterSet;
        private bool? _isDoubleByteCharacterSet;
#endif

        private RubyEncoding(Encoding/*!*/ encoding, Encoding/*!*/ strictEncoding, int ordinal) {
            Assert.NotNull(encoding, strictEncoding);
            _ordinal = ordinal;
            _encoding = encoding;
            _strictEncoding = strictEncoding;
            _maxBytesPerChar = strictEncoding.GetMaxByteCount(1);
            _isAsciiIdentity = AsciiIdentity(encoding);
        }

        /// <summary>
        /// Default construtor (ASCII)
        /// MEMO: exists mainly for Marshal.load.
        /// </summary>
        protected RubyEncoding()
            : this(CreateEncoding(CodePageAscii, false), CreateEncoding(CodePageAscii, true), -2) {
        }

        public override int GetHashCode() {
            return _ordinal;
        }

        internal Expression/*!*/ Expression {
            get { return _expression ?? (_expression = Expression.Constant(this)); }
        }

        public bool IsAsciiIdentity {
            get { return _isAsciiIdentity; }
        }

        private static Encoding/*!*/ CreateEncoding(int codepage, bool throwOnError) {
#if FEATURE_ENCODING
            if (throwOnError) {
                return Encoding.GetEncoding(codepage, EncoderFallback.ExceptionFallback, DecoderFallback.ExceptionFallback);
            } else {
                return Encoding.GetEncoding(codepage, EncoderFallback.ReplacementFallback, BinaryDecoderFallback.Instance);
            }
#else
            switch (codepage)
            {
                default:
                case CodePageAscii:
                case CodePageBinary:
                    throw new InvalidOperationException();

                case CodePageUTF8:
                    return new UTF8Encoding(false, throwOnError);

                case CodePageUTF16BE:
                    return new UnicodeEncoding(bigEndian: true, byteOrderMark: false, throwOnInvalidBytes: throwOnError);

                case CodePageUTF16LE:
                    return new UnicodeEncoding(bigEndian: false, byteOrderMark: false, throwOnInvalidBytes: throwOnError);
            }
#endif
        }

        #region Serialization
#if FEATURE_SERIALIZATION
        private RubyEncoding(SerializationInfo/*!*/ info, StreamingContext context) {
            throw Assert.Unreachable;
        }

        [Serializable]
        internal sealed class Deserializer : ISerializable, IObjectReference {
            private readonly int _codePage;

            private Deserializer(SerializationInfo/*!*/ info, StreamingContext context) {
                _codePage = info.GetInt32("CodePage");
            }

            public object GetRealObject(StreamingContext context) {
                return GetRubyEncoding(_codePage);
            }

            void ISerializable.GetObjectData(SerializationInfo/*!*/ info, StreamingContext context) {
                throw Assert.Unreachable;
            }
        }

        void ISerializable.GetObjectData(SerializationInfo/*!*/ info, StreamingContext context) {
            info.AddValue("CodePage", CodePage);
            info.SetType(typeof(Deserializer));
        }
#endif
        #endregion

        public int MaxBytesPerChar {
            get { return _maxBytesPerChar; }
        }

        public Encoding/*!*/ Encoding {
            get { return _encoding; }
        }

        public Encoding/*!*/ StrictEncoding {
            get { return _strictEncoding; }
        }

        /// <summary>
        /// Name as displayed by MRI.
        /// </summary>
        public string/*!*/ Name {
            get {
                return GetRubySpecificName(CodePage) ?? _encoding.WebName;
            }
        }

        public static string GetRubySpecificName(int codepage) {
            return GetRubyEncodingName(GetEncodingName(codepage));
        }

        public static int GetCodePage(string rubyEncodingName) {
            return GetEncodingCodePage(GetCLREncodingName(rubyEncodingName));
        }

        public int CodePage {
            get { return GetCodePage(_encoding); }
        }

        public override string/*!*/ ToString() {
            return Name;
        }

        public int CompareTo(RubyEncoding/*!*/ other) {
            return _ordinal - other._ordinal;
        }

        public static RubyRegexOptions ToRegexOption(RubyEncoding encoding) {
            if (encoding == RubyEncoding.Binary) {
                return RubyRegexOptions.FIXED;
            }

            if (encoding == null) {
                return RubyRegexOptions.NONE;
            }

            switch (encoding.CodePage) {
#if FEATURE_ENCODING
                case RubyEncoding.CodePageSJIS: return RubyRegexOptions.SJIS;
                case RubyEncoding.CodePageEUCJP: return RubyRegexOptions.EUC;
#endif
                case RubyEncoding.CodePageUTF8: return RubyRegexOptions.UTF8;
            }

            throw Assert.Unreachable;
        }

        public static RubyEncoding GetRegexEncoding(RubyRegexOptions options) {
            switch (options & RubyRegexOptions.EncodingMask) {
#if FEATURE_ENCODING
                case RubyRegexOptions.EUC: return RubyEncoding.EUCJP;
                case RubyRegexOptions.SJIS: return RubyEncoding.SJIS;
#endif
                case RubyRegexOptions.UTF8: return RubyEncoding.UTF8;
                case RubyRegexOptions.FIXED: return RubyEncoding.Binary;
                default: return null;
            }
        }

        internal static int GetCodePage(int nameInitial) {
            switch (nameInitial) {
#if FEATURE_ENCODING
                case 'E':
                case 'e': return CodePageEUCJP;
                case 'S':
                case 's': return CodePageSJIS;
#endif
                case 'U':
                case 'u': return CodePageUTF8;
                default:
                    return -1;
            }
        }

        public static RubyEncoding GetEncodingByNameInitial(int initial) {
            int codepage = GetCodePage(initial);
            return codepage > 0 ? GetRubyEncoding(codepage) : null;
        }

        public void RequireAsciiIdentity() {
            if (!_isAsciiIdentity) {
                throw new NotSupportedException(String.Format("Encoding {0} (code page {1}) is not supported", Name, CodePage));
            }
        }

        private static ReadOnlyDictionary<string, string> _aliases;

        public static ReadOnlyDictionary<string, string> Aliases {
            get { return _aliases ?? (_aliases = CreateAliases()); }
        }

        private static ReadOnlyDictionary<string, string> CreateAliases() {
            return new ReadOnlyDictionary<string, string>(new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase) {
                { "646", "US-ASCII" }, 
                { "ASCII", "US-ASCII" }, 
                { "ANSI_X3.4-1968", "US-ASCII" }, 
                { "BINARY", "ASCII-8BIT" }, 
                { "CP65001", "UTF-8" }, 
                { "UCS-2BE", "UTF-16BE" }, 
                { "UCS-4BE", "UTF-32BE" }, 
                { "UCS-4LE", "UTF-32LE" },  
#if FEATURE_ENCODING
                { "CP437", "IBM437" }, 
                { "CP737", "IBM737" }, 
                { "CP775", "IBM775" }, 
                { "CP857", "IBM857" }, 
                { "CP860", "IBM860" }, 
                { "CP861", "IBM861" }, 
                { "CP862", "IBM862" }, 
                { "CP863", "IBM863" }, 
                { "CP864", "IBM864" },
                { "CP865", "IBM865" }, 
                { "CP866", "IBM866" }, 
                { "CP869", "IBM869" }, 
                { "CP874", "Windows-874" }, 
                { "CP878", "KOI8-R" }, 
                { "CP932", "Windows-31J" }, 
                { "CP936", "GBK" }, 
                { "CP950", "Big5" }, 
                { "CP951", "Big5-HKSCS" }, 
                { "CP1258", "Windows-1258" },
                { "CP1252", "Windows-1252" }, 
                { "CP1250", "Windows-1250" }, 
                { "CP1256", "Windows-1256" }, 
                { "CP1251", "Windows-1251" },
                { "CP1253", "Windows-1253" }, 
                { "CP1255", "Windows-1255" }, 
                { "CP1254", "Windows-1254" }, 
                { "CP1257", "Windows-1257" }, 
                { "CP65000", "UTF-7" }, 
                { "IBM850", "CP850" }, 
                { "eucJP", "EUC-JP" }, 
                { "eucKR", "EUC-KR" }, 
                // { "eucTW", "EUC-TW" }, 
                { "ISO2022-JP", "ISO-2022-JP" }, 
                // { "ISO2022-JP2", "ISO-2022-JP-2" }, 
                { "ISO8859-1", "ISO-8859-1" }, 
                { "ISO8859-2", "ISO-8859-2" }, 
                { "ISO8859-3", "ISO-8859-3" }, 
                { "ISO8859-4", "ISO-8859-4" }, 
                { "ISO8859-5", "ISO-8859-5" }, 
                { "ISO8859-6", "ISO-8859-6" }, 
                { "ISO8859-7", "ISO-8859-7" }, 
                { "ISO8859-8", "ISO-8859-8" }, 
                { "ISO8859-9", "ISO-8859-9" }, 
                // { "ISO8859-10", "ISO-8859-10" }, 
                { "ISO8859-11", "ISO-8859-11" }, 
                { "ISO8859-13", "ISO-8859-13" }, 
                // { "ISO8859-14", "ISO-8859-14" }, 
                { "ISO8859-15", "ISO-8859-15" }, 
                // { "ISO8859-16", "ISO-8859-16" }, 
                { "SJIS", "Shift_JIS" }, 
                { "csWindows31J", "Windows-31J" }, 
                // { "MacJapan", "MacJapanese" }, 
                // { "UTF-8-MAC", "UTF8-MAC" }, 
                // { "UTF-8-HFS", "UTF8-MAC" }, 
#endif
            });
        }

        private static Dictionary<int, RubyEncoding> _Encodings;


        public static RubyEncoding/*!*/ GetRubyEncoding(Encoding/*!*/ encoding) {
            ContractUtils.RequiresNotNull(encoding, "encoding");
            return GetRubyEncoding(GetCodePage(encoding));
        }

        public static RubyEncoding/*!*/ GetRubyEncoding(int codepage) {
            switch (codepage) {
                case CodePageBinary: return Binary;
                case CodePageAscii: return Ascii;
                case CodePageUTF8: return UTF8;
#if FEATURE_ENCODING
                case CodePageSJIS: return SJIS;
                case CodePageEUCJP: return EUCJP;
#endif
            }

            if (_Encodings == null) {
                Interlocked.CompareExchange(ref _Encodings, new Dictionary<int, RubyEncoding>(), null);
            }

            RubyEncoding result;
            lock (_Encodings) {
                if (!_Encodings.TryGetValue(codepage, out result)) {
                    result = new RubyEncoding(
                        CreateEncoding(codepage, false),
                        CreateEncoding(codepage, true),
                        codepage
                    );

                    _Encodings.Add(codepage, result);
                }
            }

            return result;
        }

#if FEATURE_ENCODING
        private static int GetCodePage(Encoding/*!*/ encoding) {
            return encoding.CodePage;
        }

        /// <summary>
        /// Returns Ruby names of all available encodings including their aliases.
        /// </summary>
        public static IEnumerable<string> GetEncodingNames() {
            // Ruby specific:
            yield return RubyEncoding.Binary.Name;

            foreach (var info in Encoding.GetEncodings()) {
                yield return RubyEncoding.GetRubySpecificName(info.CodePage) ?? info.Name;
            }

            foreach (var alias in RubyEncoding.Aliases.Keys) {
                yield return alias;
            }

            yield return "locale";
            yield return "external";
            yield return "filesystem";
        }

        /// <summary>
        /// Returns code pages of all available encodings.
        /// </summary>
        public static IEnumerable<int> GetEncodingCodePages() {
            var infos = Encoding.GetEncodings();

            // Ruby specific:
            yield return CodePageBinary;

            foreach (var info in infos) {
                yield return info.CodePage;
            }
        }

        public static bool AsciiIdentity(Encoding/*!*/ encoding) {
            if (encoding == BinaryEncoding.Instance) {
                return true;
            }

            switch (encoding.CodePage) {
                case 437: // OEM United States
                case 708: // Arabic (ASMO 708)
                case 720: // Arabic (DOS)
                case 737: // Greek (DOS)
                case 775: // Baltic (DOS)
                case 850: // Western European (DOS)
                case 852: // Central European (DOS)
                case 855: // OEM Cyrillic
                case 857: // Turkish (DOS)
                case 858: // OEM Multilingual Latin I
                case 860: // Portuguese (DOS)
                case 861: // Icelandic (DOS)
                case 862: // Hebrew (DOS)
                case 863: // French Canadian (DOS)
                case 864: // Arabic (864)
                case 865: // Nordic (DOS)
                case 866: // Cyrillic (DOS)
                case 869: // Greek, Modern (DOS)
                case 874: // Thai (Windows)
                case 932: // Japanese (Shift-JIS)
                case 936: // Chinese Simplified (GB2312)
                case 949: // Korean
                case 950: // Chinese Traditional (Big5)
                case 1250: // Central European (Windows)
                case 1251: // Cyrillic (Windows)
                case 1252: // Western European (Windows)
                case 1253: // Greek (Windows)
                case 1254: // Turkish (Windows)
                case 1255: // Hebrew (Windows)
                case 1256: // Arabic (Windows)
                case 1257: // Baltic (Windows)
                case 1258: // Vietnamese (Windows)
                case 1361: // Korean (Johab)
                case 10000: // Western European (Mac)
                case 10001: // Japanese (Mac)
                case 10002: // Chinese Traditional (Mac)
                case 10003: // Korean (Mac)
                case 10004: // Arabic (Mac)
                case 10005: // Hebrew (Mac)
                case 10006: // Greek (Mac)
                case 10007: // Cyrillic (Mac)
                case 10008: // Chinese Simplified (Mac)
                case 10010: // Romanian (Mac)
                case 10017: // Ukrainian (Mac)
                case 10029: // Central European (Mac)
                case 10079: // Icelandic (Mac)
                case 10081: // Turkish (Mac)
                case 10082: // Croatian (Mac)
                case 20000: // Chinese Traditional (CNS)
                case 20001: // TCA Taiwan
                case 20002: // Chinese Traditional (Eten)
                case 20003: // IBM5550 Taiwan
                case 20004: // TeleText Taiwan
                case 20005: // Wang Taiwan
                case 20127: // US-ASCII
                case 20866: // Cyrillic (KOI8-R)
                case 20932: // Japanese (JIS 0208-1990 and 0212-1990)
                case 20936: // Chinese Simplified (GB2312-80)
                case 20949: // Korean Wansung
                case 21866: // Cyrillic (KOI8-U)
                case 28591: // Western European (ISO)
                case 28592: // Central European (ISO)
                case 28593: // Latin 3 (ISO)
                case 28594: // Baltic (ISO)
                case 28595: // Cyrillic (ISO)
                case 28596: // Arabic (ISO)
                case 28597: // Greek (ISO)
                case 28598: // Hebrew (ISO-Visual)
                case 28599: // Turkish (ISO)
                case 28603: // Estonian (ISO)
                case 28605: // Latin 9 (ISO)
                case 38598: // Hebrew (ISO-Logical)
                case 50220: // Japanese (JIS)
                case 50221: // Japanese (JIS-Allow 1 byte Kana)
                case 50222: // Japanese (JIS-Allow 1 byte Kana - SO/SI)
                case 50225: // Korean (ISO)
                case 50227: // Chinese Simplified (ISO-2022)
                case 51932: // Japanese (EUC)
                case 51936: // Chinese Simplified (EUC)
                case 51949: // Korean (EUC)
                case 54936: // Chinese Simplified (GB18030)
                case 57002: // ISCII Devanagari
                case 57003: // ISCII Bengali
                case 57004: // ISCII Tamil
                case 57005: // ISCII Telugu
                case 57006: // ISCII Assamese
                case 57007: // ISCII Oriya
                case 57008: // ISCII Kannada
                case 57009: // ISCII Malayalam
                case 57010: // ISCII Gujarati
                case 57011: // ISCII Punjabi
                case 65001: // Unicode (UTF-8)
                    Debug.Assert(IsAsciiIdentityFallback(encoding));
                    return true;

                default:
                    return IsAsciiIdentityFallback(encoding);
            }
        }

        private static string _AllAscii;

        private static bool IsAsciiIdentityFallback(Encoding/*!*/ encoding) {
            if (_AllAscii == null) {
                // all ASCII characters:
                var sb = new StringBuilder(0x80);
                for (int i = 0; i < 0x80; i++) {
                    sb.Append((char)i);
                }
                _AllAscii = sb.ToString();
            }

            var bytes = encoding.GetBytes(_AllAscii);
            if (bytes.Length != _AllAscii.Length) {
                return false;
            }

            for (int i = 0; i < _AllAscii.Length; i++) {
                if ((int)_AllAscii[i] != (int)bytes[i]) {
                    return false;
                }
            }

            return true;
        }

        public bool IsSingleByteCharacterSet {
            get {
                if (!_isSingleByteCharacterSet.HasValue) {
                    _isSingleByteCharacterSet = IsSBCS(CodePage);
                }

                return _isSingleByteCharacterSet.Value;
            }
        }

        public bool IsDoubleByteCharacterSet {
            get {
                if (!_isDoubleByteCharacterSet.HasValue) {
                    _isDoubleByteCharacterSet = IsDBCS(CodePage);
                }

                return _isDoubleByteCharacterSet.Value;
            }
        }

        private static int[] _sbsc;
        private static int[] _dbsc;

        private static bool IsSBCS(int codepage) {
            if (_sbsc == null) {
                _sbsc = new int[] {
                    0, 37, 437, 500, 708, 720, 737, 775, 850, 852, 855, 857, 858, 860, 861, 862, 863, 864, 865, 866, 869, 870, 874, 875, 1026, 
                    1047, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 
                    10000, 10004, 10005, 10006, 10007, 10010, 10017, 10021, 10029, 10079, 10081, 10082, 20105, 20106, 20107, 20108, 20127, 
                    20269, 20273, 20277, 20278, 20280, 20284, 20285, 20290, 20297, 20420, 20423, 20424, 20833, 20838, 20866, 20871, 20880, 
                    20905, 20924, 21025, 21866, 28592, 28593, 28594, 28595, 28596, 28597, 28598, 28599, 28603, 28605, 29001, 38598
                };
            }

            return Array.BinarySearch(_sbsc, codepage) >= 0;
        }

        private static bool IsDBCS(int codepage) {
            if (_dbsc == null) {
                _dbsc = new int[] {
                    932, 936, 949, 950, 1361, 10001, 10002, 10003, 10008, 20000, 20001, 20002, 20003, 20004, 20005, 20261, 20932, 20936, 
                    20949, 50227, 51936, 51949
                };
            }

            return Array.BinarySearch(_dbsc, codepage) >= 0;
        }

        public bool InUnicodeBasicPlane {
            get {
                // TODO: others
                return this == Ascii || this == Binary;
            }
        }

        public bool IsUnicodeEncoding {
            get {
                switch (CodePage) {
                    case CodePageUTF7:
                    case CodePageUTF8:
                    case CodePageUTF16BE:
                    case CodePageUTF16LE:
                    case CodePageUTF32BE:
                    case CodePageUTF32LE:
                        return true;
                }
                return false;
            }
        }

        Expression/*!*/ IExpressionSerializable.CreateExpression() {
            // TODO: use static fields
            return Methods.CreateEncoding.OpCall(Expression.Constant(CodePage));
        }
#else
        public static bool AsciiIdentity(Encoding/*!*/ encoding) {
            switch (GetCodePage(encoding)) {
                case CodePageAscii:
                case CodePageBinary:
                case CodePageUTF8: 
                    return true;
            }

            return false;
        }

        public bool IsSingleByteCharacterSet {
            get {
                return this == Binary;
            }
        }

        public bool IsDoubleByteCharacterSet {
            get {
                return false;
            }
        }

        public bool InUnicodeBasicPlane {
            get {
                return this == Binary;
            }
        }

        private static int GetCodePage(Encoding/*!*/ encoding) {
            Debug.Assert(encoding != null);

            if (encoding == BinaryEncoding.Instance) {
                return CodePageBinary;
            }

            if (encoding == AsciiEncoding.Instance) {
                return CodePageAscii;
            }

            if (encoding.GetType() == typeof(UTF8Encoding)) {
                return CodePageUTF8;
            }

            switch (encoding.WebName.ToUpperInvariant()) {
                case "UTF-8": return CodePageUTF8;
                case "UTF-16": return CodePageUTF16LE;
                case "UTF-16BE": return CodePageUTF16BE;
            }
            
            throw new ArgumentException(String.Format("Unknown encoding: {0}", encoding));
        }
        
        /// <summary>
        /// Returns Ruby names of all available encodings including their aliases.
        /// </summary>
        public static IEnumerable<string> GetEncodingNames()
        {
            yield return Binary.Name;
            yield return UTF8.Name;
            yield return GetRubySpecificName(CodePageUTF16BE);
            yield return GetRubySpecificName(CodePageUTF16LE);

            foreach (var alias in Aliases.Keys) {
                yield return alias;
            }

            yield return "locale";
            yield return "external";
            yield return "filesystem";
        }

        /// <summary>
        /// Returns code pages of all available encodings.
        /// </summary>
        public static IEnumerable<int> GetEncodingCodePages()
        {
            yield return CodePageBinary;
            yield return CodePageAscii;
            yield return CodePageUTF8;
            yield return CodePageUTF16BE;
            yield return CodePageUTF16LE;
        }

        Expression/*!*/ IExpressionSerializable.CreateExpression() {
            // TODO: use a static fields, deal with KCODEs
            return Expression.Constant(UTF8);
        }
#endif
    }

}
