美文网首页
过滤html里的标签获取纯文本

过滤html里的标签获取纯文本

作者: shushuzhen | 来源:发表于2018-10-26 15:20 被阅读19次

    首先定义宏

    #define REGEX_SCRIPT  @"<script[^>]*?>[\\s\\S]*?<\\/script>"
    #define REGEX_STYLE   @"<style[^>]*?>[\\s\\S]*?<\\/style>"
    #define REGEX_HTML    @"<[^>]+>"
    #define REGEX_SPACE   @"\\s*|\t|\r|\n"
    #define REGEX_DEL_P   @"</?[a|A][^>]*>"
    
    

    之后就是一层层的过滤

    - (NSString *)regexString:(NSString *)htmlString{
       
        NSRegularExpression *reExFirst = [NSRegularExpression regularExpressionWithPattern:REGEX_SCRIPT options:0 error:nil];
        htmlString = [reExFirst stringByReplacingMatchesInString:htmlString options:NSMatchingReportProgress range:NSMakeRange(0, htmlString.length) withTemplate:@""];
        
        NSRegularExpression *reExSecond = [NSRegularExpression regularExpressionWithPattern:REGEX_STYLE options:0 error:nil];
        htmlString = [reExSecond stringByReplacingMatchesInString:htmlString options:NSMatchingReportProgress range:NSMakeRange(0, htmlString.length) withTemplate:@""];
        
        NSRegularExpression *reExThird = [NSRegularExpression regularExpressionWithPattern:REGEX_HTML options:0 error:nil];
        htmlString = [reExThird stringByReplacingMatchesInString:htmlString options:NSMatchingReportProgress range:NSMakeRange(0, htmlString.length) withTemplate:@""];
        
        NSRegularExpression *reExFourth = [NSRegularExpression regularExpressionWithPattern:REGEX_SPACE options:0 error:nil];
        htmlString = [reExFourth stringByReplacingMatchesInString:htmlString options:NSMatchingReportProgress range:NSMakeRange(0, htmlString.length) withTemplate:@""];
        
        NSRegularExpression *reExFifth = [NSRegularExpression regularExpressionWithPattern:REGEX_DEL_P options:0 error:nil];
        htmlString = [reExFifth stringByReplacingMatchesInString:htmlString options:NSMatchingReportProgress range:NSMakeRange(0, htmlString.length) withTemplate:@""];
        
        
        return htmlString;
        
    }
    

    相关文章

      网友评论

          本文标题:过滤html里的标签获取纯文本

          本文链接:https://www.haomeiwen.com/subject/ydaktqtx.html