# -*- coding: utf-8 -*-
import sys, os
import pyltp
from pyltp import SentenceSplitter,Segmentor, Postagger, Parser, NamedEntityRecognizer, SementicRoleLabeller
paragraph = '叙利亚东古塔地区7日发生疑似化学武器袭击事件,导致70余人丧生。报道一出,叙利亚反对派、美国、英国、法国等纷纷指责叙政府军使用化学武器袭击无辜平民。但叙利亚坚决否认,并指责西方和叙反对派造谣,目的是保护被围困的恐怖分子。俄外交部则认为,该谣言旨在袒护恐怖分子,并为外部势力发动打击寻找借口。'
sentence = SentenceSplitter.split(paragraph)[0]
print(sentence)
segmentor = Segmentor()
segmentor.load("ltp_data_v3.4.0/cws.model")
words = segmentor.segment(paragraph)
print(" ".join(words))
postagger = Postagger()
postagger.load("ltp_data_v3.4.0/pos.model")
postags = postagger.postag(words)
print(" ".join(postags))
parser = Parser()
parser.load("ltp_data_v3.4.0/parser.model")
arcs = parser.parse(words, postags)
print(" ".join("%d:%s" % (arc.head, arc.relation) for arc in arcs))
#for arc in arcs:
# print(arc.head)
# print(arc.relation)
recognizer = NamedEntityRecognizer()
recognizer.load("ltp_data_v3.4.0/ner.model")
netag = recognizer.recognize(words, postags)
for word,ntag in zip(words, netag):
if ntag != 'O':
print(word + '/' + ntag)
print(" ".join(netag))
# 命名实体识别
word_list = ['欧几里得', '是', '西元前', '三', '世纪', '的', '希腊', '数学家', '。']
postags_list = ['nh', 'v', 'nt', 'm', 'n', 'u', 'ns', 'n', 'wp']
nertags = recognizer.recognize(word_list, postags_list)
for word,ntag in zip(word_list, nertags):
if ntag != 'O':
print(word + '/' + ntag)
#print (" ".join(word_list))
print (' '.join(nertags))
segmentor.release()
postagger.release()
parser.release()
recognizer.release()
网友评论