美文网首页
iOS 分词

iOS 分词

作者: jianshufei | 来源:发表于2022-07-05 11:33 被阅读0次
    func wordBoundary(_ text: String) -> [String] {
            let tokenize = CFStringTokenizerCreate(kCFAllocatorDefault, text as CFString?, CFRangeMake(0, text.count), kCFStringTokenizerUnitWordBoundary, CFLocaleCopyCurrent())
            CFStringTokenizerAdvanceToNextToken(tokenize)
            var range = CFStringTokenizerGetCurrentTokenRange(tokenize)
            var boundaries : [String] = []
            while range.length > 0 {
                let wRange = text.index(text.startIndex, offsetBy: range.location)..<text.index(text.startIndex, offsetBy: range.location + range.length)
                let keyWord = String(text[wRange])
                boundaries.append(keyWord)
                CFStringTokenizerAdvanceToNextToken(tokenize)
                range = CFStringTokenizerGetCurrentTokenRange(tokenize)
            }
            return boundaries
        }
        
    func wordBoundariesWithSentences(_ text: String) -> [Dictionary<String, Array<String>>] {
            let tokenize = CFStringTokenizerCreate(kCFAllocatorDefault, text as CFString?, CFRangeMake(0, text.count), kCFStringTokenizerUnitSentence, CFLocaleCopyCurrent())
            CFStringTokenizerAdvanceToNextToken(tokenize)
            var range = CFStringTokenizerGetCurrentTokenRange(tokenize)
            var result : [Dictionary<String, Array<String>>] = []
            while range.length > 0 {
                let wRange = text.index(text.startIndex, offsetBy: range.location)..<text.index(text.startIndex, offsetBy: range.location + range.length)
                let sentence = String(text[wRange])
                let words = wordBoundary(sentence)
                result.append([sentence: words])
                CFStringTokenizerAdvanceToNextToken(tokenize)
                range = CFStringTokenizerGetCurrentTokenRange(tokenize)
            }
            return result
        }
    

    相关文章

      网友评论

          本文标题:iOS 分词

          本文链接:https://www.haomeiwen.com/subject/utlsbrtx.html