EncodingHelper.cs 9.45 KB
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace CNCFanucDataReading
{
    public static class EncodingHelper
    {
        /// <summary>
        /// 修复Fanuc编码问题
        /// </summary>
        public static string FixFanucEncoding(string input)
        {
            if (string.IsNullOrEmpty(input)) return input;

            // 情况1:已经是正确的中文
            if (IsValidChineseText(input))
                return input;

            // 情况2:乱码,尝试重新编码
            return TryFixEncoding(input);
        }

        /// <summary>
        /// 尝试修复编码
        /// </summary>
        private static string TryFixEncoding(string garbledText)
        {
            Console.WriteLine($"=== 开始修复编码 ===");
            Console.WriteLine($"原始文本: {EscapeString(garbledText)}");
            Console.WriteLine($"长度: {garbledText.Length}");

            // 获取原始字节(使用系统默认编码)
            byte[] originalBytes = Encoding.Default.GetBytes(garbledText);
            Console.WriteLine($"原始字节(Hex): {BitConverter.ToString(originalBytes)}");

            // 尝试各种编码组合
            var testCases = new List<(string name, Encoding encoding)>
    {
        // Fanuc常用编码
        ("GBK", Encoding.GetEncoding("GBK")),
        ("GB2312", Encoding.GetEncoding("GB2312")),
        ("GB18030", Encoding.GetEncoding("GB18030")),
        ("Shift-JIS", Encoding.GetEncoding("shift_jis")),
        ("EUC-JP", Encoding.GetEncoding("euc-jp")),
        ("UTF-8", Encoding.UTF8),
        ("UTF-16", Encoding.Unicode),
        ("UTF-16BE", Encoding.BigEndianUnicode),
        ("Windows-1252", Encoding.GetEncoding(1252)),
        ("ISO-8859-1", Encoding.GetEncoding(28591)),
        
        // 尝试从其他编码转换
        ("Big5 -> GBK", null), // 繁转简
    };

            foreach (var (name, encoding) in testCases)
            {
                try
                {
                    string result = "";

                    if (encoding != null)
                    {
                        // 直接解码
                        result = encoding.GetString(originalBytes);
                    }
                    else if (name == "Big5 -> GBK")
                    {
                        // 尝试从繁体转简体
                        result = ConvertBig5ToGb(garbledText);
                    }

                    if (!string.IsNullOrEmpty(result) && IsValidChineseText(result))
                    {
                        Console.WriteLine($"✅ 成功使用 {name} 修复: {result}");
                        return result.Trim();
                    }
                    else if (!string.IsNullOrEmpty(result))
                    {
                        Console.WriteLine($"❌ {name}: {result}");
                    }
                }
                catch (Exception ex)
                {
                    Console.WriteLine($"❌ {name} 错误: {ex.Message}");
                }
            }

            // 尝试双字节修复(常见于编码错误)
            string doubleByteFixed = TryDoubleByteFix(garbledText);
            if (IsValidChineseText(doubleByteFixed))
            {
                Console.WriteLine($"✅ 双字节修复成功: {doubleByteFixed}");
                return doubleByteFixed;
            }

            Console.WriteLine($"❌ 所有编码尝试都失败");
            return garbledText; // 返回原始文本
        }

        /// <summary>
        /// 尝试双字节修复
        /// </summary>
        private static string TryDoubleByteFix(string text)
        {
            // 常见情况:GBK被误识别为其他编码
            var sb = new StringBuilder();

            for (int i = 0; i < text.Length; i++)
            {
                char c = text[i];

                // 如果是ASCII字符,保留
                if (c < 128)
                {
                    sb.Append(c);
                }
                // 处理中文字符
                else if (i + 1 < text.Length)
                {
                    // 尝试将两个字节转换为GBK字符
                    char c2 = text[i + 1];
                    byte[] bytes = new byte[2];

                    // 假设是GBK编码的两个字节
                    bytes[0] = (byte)c;
                    bytes[1] = (byte)c2;

                    try
                    {
                        string chineseChar = Encoding.GetEncoding("GBK").GetString(bytes);
                        if (chineseChar.Length == 1 && IsChineseChar(chineseChar[0]))
                        {
                            sb.Append(chineseChar);
                            i++; // 跳过第二个字节
                        }
                        else
                        {
                            sb.Append(c);
                        }
                    }
                    catch
                    {
                        sb.Append(c);
                    }
                }
                else
                {
                    sb.Append(c);
                }
            }

            return sb.ToString();
        }

        /// <summary>
        /// 判断是否是有效的中文文本
        /// </summary>
        private static bool IsValidChineseText(string text)
        {
            if (string.IsNullOrEmpty(text)) return false;

            int chineseCount = 0;
            int validCharCount = 0;
            int totalCount = 0;

            foreach (char c in text)
            {
                totalCount++;

                // 中文字符
                if (IsChineseChar(c))
                {
                    chineseCount++;
                    validCharCount++;
                }
                // 英文字母、数字、常见标点
                else if (char.IsLetterOrDigit(c) ||
                         c == ' ' || c == '.' || c == ',' || c == ':' ||
                         c == ';' || c == '-' || c == '_' || c == '(' ||
                         c == ')' || c == '[' || c == ']' || c == '{' ||
                         c == '}' || c == '/' || c == '\\' || c == '\'' ||
                         c == '"' || c == '!' || c == '?' || c == '~' ||
                         c == '=' || c == '+' || c == '*' || c == '&' ||
                         c == '^' || c == '%' || c == '$' || c == '#' ||
                         c == '@' || c == '\n' || c == '\r' || c == '\t')
                {
                    validCharCount++;
                }
                // 控制字符(除了换行等)
                else if (char.IsControl(c) && c != '\n' && c != '\r' && c != '\t')
                {
                    // 无效字符
                }
                else
                {
                    validCharCount++;
                }
            }

            // 有效字符比例超过80%,并且有一定中文内容
            float validRatio = (float)validCharCount / totalCount;
            float chineseRatio = (float)chineseCount / totalCount;

            return validRatio > 0.8f && (chineseRatio > 0.1f || chineseCount > 2);
        }

        /// <summary>
        /// 判断是否是中文字符
        /// </summary>
        private static bool IsChineseChar(char c)
        {
            // CJK统一表意文字
            if (c >= 0x4E00 && c <= 0x9FFF) return true;

            // CJK扩展A
            if (c >= 0x3400 && c <= 0x4DBF) return true;

            // CJK扩展B
            if (c >= 0x20000 && c <= 0x2A6DF) return true;

            // 常用中文标点
            if (c == ',' || c == '。' || c == '!' || c == '?' ||
                c == ';' || c == ':' || c == '(' || c == ')' ||
                c == '【' || c == '】' || c == '《' || c == '》' ||
                c == '、' || c == '「' || c == '」' || c == '『' ||
                c == '』' || c == '~' || c == '…' || c == '—' ||
                c == '·' || c == '"' || c == ''' || c == '¥')
            {
                return true;
            }

            return false;
        }

        /// <summary>
        /// 转换转义字符用于显示
        /// </summary>
        private static string EscapeString(string text)
        {
            if (string.IsNullOrEmpty(text)) return "";

            var sb = new StringBuilder();
            foreach (char c in text)
            {
                if (char.IsControl(c))
                {
                    switch (c)
                    {
                        case '\n': sb.Append("\\n"); break;
                        case '\r': sb.Append("\\r"); break;
                        case '\t': sb.Append("\\t"); break;
                        case '\0': sb.Append("\\0"); break;
                        default: sb.Append($"\\u{(int)c:X4}"); break;
                    }
                }
                else
                {
                    sb.Append(c);
                }
            }
            return sb.ToString();
        }

        /// <summary>
        /// 繁体转简体(Big5转GBK)
        /// </summary>
        private static string ConvertBig5ToGb(string text)
        {
            try
            {
                // 从Big5转码
                byte[] big5Bytes = Encoding.GetEncoding("big5").GetBytes(text);
                // 转到GBK
                return Encoding.GetEncoding("GBK").GetString(big5Bytes);
            }
            catch
            {
                return text;
            }
        }
    }
}