美文网首页
iOS 获取文件字符编码

iOS 获取文件字符编码

作者: 夬大爷 | 来源:发表于2016-12-08 16:02 被阅读0次
          NSString *fileString=nil;
          if (fileString==nil||[fileString length]==0) {
            NSArray *codeArray=@[
                /*  kCFStringEncodingMacRoman = 0L, defined in CoreFoundation/CFString.h */
                @(kCFStringEncodingMacJapanese),
                @(kCFStringEncodingMacChineseTrad),
                @(kCFStringEncodingMacKorean),
                @(kCFStringEncodingMacArabic),
                @(kCFStringEncodingMacHebrew),
                @(kCFStringEncodingMacGreek),
                @(kCFStringEncodingMacCyrillic),
                @(kCFStringEncodingMacDevanagari),
                @(kCFStringEncodingMacGurmukhi),
                @(kCFStringEncodingMacGujarati),
               @(kCFStringEncodingMacOriya),
                @(kCFStringEncodingMacBengali),
                @(kCFStringEncodingMacTamil),
                @(kCFStringEncodingMacTelugu),
                @(kCFStringEncodingMacKannada),
                @(kCFStringEncodingMacMalayalam),
                @(kCFStringEncodingMacSinhalese),
                @(kCFStringEncodingMacBurmese),
                @(kCFStringEncodingMacKhmer),
                @(kCFStringEncodingMacThai ),
                @(kCFStringEncodingMacLaotian),
                @(kCFStringEncodingMacGeorgian),
                @(kCFStringEncodingMacArmenian),
                @(kCFStringEncodingMacChineseSimp),
                @(kCFStringEncodingMacTibetan ),
                @(kCFStringEncodingMacMongolian ),
                @(kCFStringEncodingMacEthiopic ) ,
                @(kCFStringEncodingMacCentralEurRoman),
                @(kCFStringEncodingMacVietnamese),
                @(kCFStringEncodingMacExtArabic ),
                /* The following use script code 0, smRoman */
                @(kCFStringEncodingMacSymbol ),
                @(kCFStringEncodingMacDingbats),
                @(kCFStringEncodingMacTurkish),
                @(kCFStringEncodingMacCroatian),
                @(kCFStringEncodingMacIcelandic),
                @(kCFStringEncodingMacRomanian),
                @(kCFStringEncodingMacCeltic),
                @(kCFStringEncodingMacGaelic),
                /* The following use script code 4, smArabic */
                @(kCFStringEncodingMacFarsi), /* Like MacArabic but uses Farsi digits */
                /* The following use script code 7, smCyrillic */
                @(kCFStringEncodingMacUkrainian),
                /* The following use script code 32, smUnimplemented */
                @(kCFStringEncodingMacInuit),
                @(kCFStringEncodingMacVT100), /* VT100/102 font from Comm Toolbox: Latin-1 repertoire + box drawing etc */
                /* Special Mac OS encodings*/
                @(kCFStringEncodingMacHFS), /* Meta-value, should never appear in a table */
                
                /* Unicode & ISO UCS encodings begin at 0x100 */
                /* We don't use Unicode variations defined in TextEncoding; use the ones in CFString.h, instead. */
                
                /* ISO 8-bit and 7-bit encodings begin at 0x200 */
                /*  kCFStringEncodingISOLatin1)0x0201, defined in CoreFoundation/CFString.h */
                @(kCFStringEncodingISOLatin2), /* ISO 8859-2 */
                @(kCFStringEncodingISOLatin3), /* ISO 8859-3 */
                @(kCFStringEncodingISOLatin4), /* ISO 8859-4 */
                @(kCFStringEncodingISOLatinCyrillic), /* ISO 8859-5 */
                @(kCFStringEncodingISOLatinArabic), /* ISO 8859-6, =ASMO 708, =DOS CP 708 */
                @(kCFStringEncodingISOLatinGreek), /* ISO 8859-7 */
                @(kCFStringEncodingISOLatinHebrew), /* ISO 8859-8 */
                @(kCFStringEncodingISOLatin5), /* ISO 8859-9 */
                @(kCFStringEncodingISOLatin6), /* ISO 8859-10 */
                @(kCFStringEncodingISOLatinThai), /* ISO 8859-11 */
                @(kCFStringEncodingISOLatin7), /* ISO 8859-13 */
                @(kCFStringEncodingISOLatin8), /* ISO 8859-14 */
                @(kCFStringEncodingISOLatin9), /* ISO 8859-15 */
                @(kCFStringEncodingISOLatin10), /* ISO 8859-16 */
                
                /* MS-DOS & Windows encodings begin at 0x400 */
                @(kCFStringEncodingDOSLatinUS), /* code page 437 */
                @(kCFStringEncodingDOSGreek),  /* code page 737 (formerly code page 437G) */
                @(kCFStringEncodingDOSBalticRim), /* code page 775 */
                @(kCFStringEncodingDOSLatin1), /* code page 850, "Multilingual" */
                @(kCFStringEncodingDOSGreek1), /* code page 851 */
                @(kCFStringEncodingDOSLatin2), /* code page 852, Slavic */
                @(kCFStringEncodingDOSCyrillic), /* code page 855, IBM Cyrillic */
                @(kCFStringEncodingDOSTurkish), /* code page 857, IBM Turkish */
                @(kCFStringEncodingDOSPortuguese), /* code page 860 */
                @(kCFStringEncodingDOSIcelandic), /* code page 861 */
                @(kCFStringEncodingDOSHebrew), /* code page 862 */
                @(kCFStringEncodingDOSCanadianFrench), /* code page 863 */
                @(kCFStringEncodingDOSArabic), /* code page 864 */
                @(kCFStringEncodingDOSNordic), /* code page 865 */
                @(kCFStringEncodingDOSRussian), /* code page 866 */
                @(kCFStringEncodingDOSGreek2), /* code page 869, IBM Modern Greek */
                @(kCFStringEncodingDOSThai),  /* code page 874, also for Windows */
                @(kCFStringEncodingDOSJapanese), /* code page 932, also for Windows */
                @(kCFStringEncodingDOSChineseSimplif), /* code page 936, also for Windows */
                @(kCFStringEncodingDOSKorean), /* code page 949, also for Windows; Unified Hangul Code */
                @(kCFStringEncodingDOSChineseTrad), /* code page 950, also for Windows */
                /*  kCFStringEncodingWindowsLatin1)0x0500, defined in CoreFoundation/CFString.h */
                @(kCFStringEncodingWindowsLatin2), /* code page 1250, Central Europe */
                @(kCFStringEncodingWindowsCyrillic), /* code page 1251, Slavic Cyrillic */
                @(kCFStringEncodingWindowsGreek), /* code page 1253 */
                @(kCFStringEncodingWindowsLatin5), /* code page 1254, Turkish */
                @(kCFStringEncodingWindowsHebrew), /* code page 1255 */
                @(kCFStringEncodingWindowsArabic), /* code page 1256 */
                @(kCFStringEncodingWindowsBalticRim), /* code page 1257 */
                @(kCFStringEncodingWindowsVietnamese), /* code page 1258 */
                @(kCFStringEncodingWindowsKoreanJohab), /* code page 1361, for Windows NT */
                
                /* Various national standards begin at 0x600 */
                /*  kCFStringEncodingASCII)0x0600, defined in CoreFoundation/CFString.h */
                @(kCFStringEncodingANSEL), /* ANSEL (ANSI Z39.47) */
                @(kCFStringEncodingJIS_X0201_76),
                @(kCFStringEncodingJIS_X0208_83),
                @(kCFStringEncodingJIS_X0208_90),
                @(kCFStringEncodingJIS_X0212_90),
                @(kCFStringEncodingJIS_C6226_78),
                @(kCFStringEncodingShiftJIS_X0213), /* Shift-JIS format encoding of JIS X0213 planes 1 and 2*/
                @(kCFStringEncodingShiftJIS_X0213_MenKuTen), /* JIS X0213 in plane-row-column notation */
                @(kCFStringEncodingGB_2312_80),
                @(kCFStringEncodingGBK_95),  /* annex to GB 13000-93; for Windows 95 */
                @(kCFStringEncodingGB_18030_2000),
                @(kCFStringEncodingKSC_5601_87), /* same as KSC 5601-92 without Johab annex */
                @(kCFStringEncodingKSC_5601_92_Johab), /* KSC 5601-92 Johab annex */
                @(kCFStringEncodingCNS_11643_92_P1), /* CNS 11643-1992 plane 1 */
                @(kCFStringEncodingCNS_11643_92_P2), /* CNS 11643-1992 plane 2 */
                @(kCFStringEncodingCNS_11643_92_P3), /* CNS 11643-1992 plane 3 (was plane 14 in 1986 version) */
                
                /* ISO 2022 collections begin at 0x800 */
                @(kCFStringEncodingISO_2022_JP),
                @(kCFStringEncodingISO_2022_JP_2),
                @(kCFStringEncodingISO_2022_JP_1), /* RFC 2237*/
                @(kCFStringEncodingISO_2022_JP_3), /* JIS X0213*/
                @(kCFStringEncodingISO_2022_CN),
                @(kCFStringEncodingISO_2022_CN_EXT),
                @(kCFStringEncodingISO_2022_KR),
                
                /* EUC collections begin at 0x900 */
                @(kCFStringEncodingEUC_JP),  /* ISO 646, 1-byte katakana, JIS 208, JIS 212 */
                @(kCFStringEncodingEUC_CN),  /* ISO 646, GB 2312-80 */
                @(kCFStringEncodingEUC_TW),  /* ISO 646, CNS 11643-1992 Planes 1-16 */
                @(kCFStringEncodingEUC_KR),  /* ISO 646, KS C 5601-1987 */
                
                /* Misc standards begin at 0xA00 */
                @(kCFStringEncodingShiftJIS),  /* plain Shift-JIS */
                @(kCFStringEncodingKOI8_R),  /* Russian internet standard */
                @(kCFStringEncodingBig5),  /* Big-5 (has variants) */
                @(kCFStringEncodingMacRomanLatin1), /* Mac OS Roman permuted to align with ISO Latin-1 */
                @(kCFStringEncodingHZ_GB_2312), /* HZ (RFC 1842, for Chinese mail & news) */
                @(kCFStringEncodingBig5_HKSCS_1999), /* Big-5 with Hong Kong special char set supplement*/
                @(kCFStringEncodingVISCII), /* RFC 1456, Vietnamese */
                @(kCFStringEncodingKOI8_U), /* RFC 2319, Ukrainian */
                @(kCFStringEncodingBig5_E), /* Taiwan Big-5E standard */
                
                /* Other platform encodings*/
                /*  kCFStringEncodingNextStepLatin)0x0B01, defined in CoreFoundation/CFString.h */
                @(kCFStringEncodingNextStepJapanese), /* NextStep Japanese encoding */
                
                /* EBCDIC & IBM host encodings begin at 0xC00 */
                @(kCFStringEncodingEBCDIC_US), /* basic EBCDIC-US */
                @(kCFStringEncodingEBCDIC_CP037), /* code page 037, extended EBCDIC (Latin-1 set) for US,Canada... */
                
                @(kCFStringEncodingUTF7), /* kTextEncodingUnicodeDefault + kUnicodeUTF7Format RFC2152 */
                @(kCFStringEncodingUTF7_IMAP), /* UTF-7 (IMAP folder variant) RFC3501 */
                
                /* Deprecated constants */
                @(kCFStringEncodingShiftJIS_X0213_00) /* Shift-JIS format encoding of JIS X0213 planes 1 and 2 (DEPRECATED) */
                ];
            
            for (NSNumber *number in codeArray) {
                unsigned int encodingCode = [number unsignedIntValue];
                NSStringEncoding enc = CFStringConvertEncodingToNSStringEncoding(encodingCode);
                
                NSError *error = nil;
                fileString = [NSString stringWithContentsOfFile:path encoding:enc  error:&error];
                NSLog(@"Error:%@", [error localizedDescription]);
                NSLog(@"%@", fileString);
                NSLog(@"NSStringEncoding=%@",fileString);
            }
    
        }
        
        
        
        if (fileString==nil||[fileString length]==0) {
            
            NSArray *arrEncoding = @[@(NSASCIIStringEncoding),
                                     @(NSNEXTSTEPStringEncoding),
                                     @(NSJapaneseEUCStringEncoding),
                                     @(NSUTF8StringEncoding),
                                     @(NSISOLatin1StringEncoding),
                                     @(NSSymbolStringEncoding),
                                     @(NSNonLossyASCIIStringEncoding),
                                     @(NSShiftJISStringEncoding),
                                     @(NSISOLatin2StringEncoding),
                                     @(NSUnicodeStringEncoding),
                                     @(NSWindowsCP1251StringEncoding),
                                     @(NSWindowsCP1252StringEncoding),
                                     @(NSWindowsCP1253StringEncoding),
                                     @(NSWindowsCP1254StringEncoding),
                                     @(NSWindowsCP1250StringEncoding),
                                     @(NSISO2022JPStringEncoding),
                                     @(NSMacOSRomanStringEncoding),
                                     @(NSUTF16StringEncoding),
                                     @(NSUTF16BigEndianStringEncoding),
                                     @(NSUTF16LittleEndianStringEncoding),
                                     @(NSUTF32StringEncoding),
                                     @(NSUTF32BigEndianStringEncoding),
                                     @(NSUTF32LittleEndianStringEncoding)
                                     ];
            
            NSArray *arrEncodingName = @[@"NSASCIIStringEncoding",
                                         @"NSNEXTSTEPStringEncoding",
                                         @"NSJapaneseEUCStringEncoding",
                                         @"NSUTF8StringEncoding",
                                         @"NSISOLatin1StringEncoding",
                                         @"NSSymbolStringEncoding",
                                         @"NSNonLossyASCIIStringEncoding",
                                         @"NSShiftJISStringEncoding",
                                         @"NSISOLatin2StringEncoding",
                                         @"NSUnicodeStringEncoding",
                                         @"NSWindowsCP1251StringEncoding",
                                         @"NSWindowsCP1252StringEncoding",
                                         @"NSWindowsCP1253StringEncoding",
                                         @"NSWindowsCP1254StringEncoding",
                                         @"NSWindowsCP1250StringEncoding",
                                         @"NSISO2022JPStringEncoding",
                                         @"NSMacOSRomanStringEncoding",
                                         @"NSUTF16StringEncoding",
                                         @"NSUTF16BigEndianStringEncoding",
                                         @"NSUTF16LittleEndianStringEncoding",
                                         @"NSUTF32StringEncoding",
                                         @"NSUTF32BigEndianStringEncoding",
                                         @"NSUTF32LittleEndianStringEncoding"
                                         ];
            
            for (int i = 0 ; i < [arrEncoding count]; i++) {
                unsigned long encodingCode = [arrEncoding[i] unsignedLongValue];
                NSLog(@"(%@)", arrEncodingName[i]);
                NSError *error = nil;
                fileString = [NSString stringWithContentsOfFile:path encoding:encodingCode  error:&error];
                NSLog(@"Error:%@", [error localizedDescription]);
                NSLog(@"%@", fileString);
            }
        }
    

    相关文章

      网友评论

          本文标题:iOS 获取文件字符编码

          本文链接:https://www.haomeiwen.com/subject/kojsmttx.html