func wordBoundary(_ text: String) -> [String] {
let tokenize = CFStringTokenizerCreate(kCFAllocatorDefault, text as CFString?, CFRangeMake(0, text.count), kCFStringTokenizerUnitWordBoundary, CFLocaleCopyCurrent())
CFStringTokenizerAdvanceToNextToken(tokenize)
var range = CFStringTokenizerGetCurrentTokenRange(tokenize)
var boundaries : [String] = []
while range.length > 0 {
let wRange = text.index(text.startIndex, offsetBy: range.location)..<text.index(text.startIndex, offsetBy: range.location + range.length)
let keyWord = String(text[wRange])
boundaries.append(keyWord)
CFStringTokenizerAdvanceToNextToken(tokenize)
range = CFStringTokenizerGetCurrentTokenRange(tokenize)
}
return boundaries
}
func wordBoundariesWithSentences(_ text: String) -> [Dictionary<String, Array<String>>] {
let tokenize = CFStringTokenizerCreate(kCFAllocatorDefault, text as CFString?, CFRangeMake(0, text.count), kCFStringTokenizerUnitSentence, CFLocaleCopyCurrent())
CFStringTokenizerAdvanceToNextToken(tokenize)
var range = CFStringTokenizerGetCurrentTokenRange(tokenize)
var result : [Dictionary<String, Array<String>>] = []
while range.length > 0 {
let wRange = text.index(text.startIndex, offsetBy: range.location)..<text.index(text.startIndex, offsetBy: range.location + range.length)
let sentence = String(text[wRange])
let words = wordBoundary(sentence)
result.append([sentence: words])
CFStringTokenizerAdvanceToNextToken(tokenize)
range = CFStringTokenizerGetCurrentTokenRange(tokenize)
}
return result
}
网友评论