時期實體識別

import re
from datetime import datetime,timedelta
from dateutil.parser import parse
import jieba.posseg as psg
util_cn_num = 
util_cn_unit = 
def cn2dig(src):
if src == "":
return none
m = re.match("\d+", src)
if m:
return int(m.group(0))
rsl = 0
unit = 1
for item in src[::-1]:
if item in util_cn_unit.keys():
unit = util_cn_unit[item]
elif item in util_cn_num.keys():
num = util_cn_num[item]
rsl += num * unit
else:
return none
if rsl < unit:
rsl += unit
return rsl
def year2dig(year):
res = ''
for item in year:
if item in util_cn_num.keys():
res = res + str(util_cn_num[item])
else:
res = res + item
m = re.match("\d+", res)
if m:
if len(m.group(0)) == 2:
return int(datetime.datetime.today().year/100)*100 + int(m.group(0))
else:
return int(m.group(0))
else:
return none
def parse_datetime(msg):
if msg is none or len(msg) == 0:
return none
try:
dt = parse(msg, fuzzy=true)
return dt.strftime('%y-%m-%d %h:%m:%s')
except exception as e:
m = re.match(
r"([0-9零一二兩三四五六七**十]+年)?([0-9一二兩三四五六七**十]+月)?([0-9一二兩三四五六七**十]+[號日])?([上中下午晚早]+)?([0-9零一二兩三四五六七**十百]+[點:\.時])?([0-9零一二三四五六七**十百]+分?)?([0-9零一二三四五六七**十百]+秒)?",
msg)
if m.group(0) is not none:
res = 
params = {}
for name in res:
if res[name] is not none and len(res[name]) != 0:
tmp = none
if name == 'year':
tmp = year2dig(res[name][:-1])
else:
tmp = cn2dig(res[name][:-1])
if tmp is not none:
params[name] = int(tmp)
target_date = datetime.today().replace(**params)
is_pm = m.group(4)
if is_pm is not none:
if is_pm == u'下午' or is_pm == u'晚上' or is_pm =='中午':
hour = target_date.time().hour
if hour < 12:
target_date = target_date.replace(hour=hour + 12)
return target_date.strftime('%y-%m-%d %h:%m:%s')
else:
return none
def check_time_valid(word):
m = re.match("\d+$", word)
if m:
if len(word) <= 6:
return none
word1 = re.sub('[號|日]\d+$', '日', word)
if word1 != word:
return check_time_valid(word1)
else:
return word1
#時間提取
def time_extract(text):
time_res = 
word = ''
keydate = 
for k, v in psg.cut(text):
if k in keydate:
if word != '':
word = (datetime.today() + timedelta(days=keydate.get(k, 0))).strftime('%y%m%d%h%m%s').format(y='年', m='月',d='日', h='時', f='分', s='秒')
elif word != '':
if v in ['m', 't']:
word = word + k
else:
word = ''
elif v in ['m', 't']:
word = k
if word != '':
result = list(filter(lambda x: x is not none, [check_time_valid(w) for w in time_res]))
final_res = [parse_datetime(w) for w in result]
return [x for x in final_res if x is not none]
text1 = '我要住到明天下午三點'
print(text1, time_extract(text1), sep=':')
text2 = '預定28號的房間'
print(text2, time_extract(text2), sep=':')
text3 = '我要從26號下午4點住到11月2號'
print(text3, time_extract(text3), sep=':')
text4 = '我要預訂今天到30的房間'
print(text4, time_extract(text4), sep=':')
text5 = '今天30號呵呵'
print(text5, time_extract(text5), sep=':')

ai命名實體識別模型命名實體識別

crf中有兩類特徵函式，分別是狀態特徵和轉移特徵，狀態特徵用當前節點某個輸出位置可能的狀態中的某個狀態稱為乙個節點的狀態分數表示，轉移特徵用上乙個節點到當前節點的轉移分數表示。其損失函式定義如下 crf損失函式的計算，需要用到真實路徑分數包括狀態分數和轉移分數其他所有可能的路徑的分數包括狀...

命名實體識別

簡單的分詞器如二元分詞器無法識別oov，所以需要運用一些規定的規則來輔助識別如在識別音譯人名時，可以設定規則一旦發現某詞是人名，而該詞後面跟隨人名詞時，將他們合併針對不同情況，需要設計相應的標註集拿人名識別舉例輸入資料集進行訓練後，會將人名拆分為碎片，模擬人名的錯誤切分.接著，檢查拆...

實體識別類別標註

當我們要對字串中的實體進行標註時，需要尋找到實體在字串的開始位置如下 def index q list in k list q list,k list known q list in k list,find index first time of q list in k list q list l...

時期實體識別

ai命名實體識別模型 命名實體識別

命名實體識別

實體識別類別標註

相關推薦

ai命名實體識別模型命名實體識別