基於TextRank API寫的測試

2021-08-13 19:46:04 字數 3511 閱讀 9516

import pickle

from textrank4zh import textrank4keyword, textrank4sentence

import os

from snownlp import snownlp

import jieba

import jieba.analyse

from bosonnlp import bosonnlp

#def main():

# textrank2()

textranktest1()

## def textrank2():

#     """"載入50萬資料"""

##     path = "d:\\150w"

#     file = open(path, 'rb')

#     a = pickle.load(file)

#     # print(a)

#     # tr4s = textrank4sentence()

#     ll =

#     i = 0

#     for item in a:

#         print(item[0])

#         print("***************=標題**********===")

#         print(item[1])

#         rowitem = item[2]

#         s = snownlp(rowitem)

#         print("***************====原文***************====")

#         print(rowitem)

#         # tr4s.analyze(text=rowitem, lower=true, source='all_filters')

#         # print(rowitem)

#         # print("\033[1;31m%s\033[43m" %rowitem)

#         ceshi =

#         nlp = bosonnlp('lsfw0zxs.17321.5fbmjszhbwev')

#         print("***************===情感分析********************")

#         print(nlp.sentiment(rowitem))  # 情感分析結果分別為 「非負面」 和 「負面」 概率組成的列表。

#         print('***************===摘要********************=')

#         tags_output = jieba.analyse.extract_tags(rowitem, topk=20, withweight=true)

#         print(tags_output)

#         print(s.summary(5))

#         i = i + 1

#         if i > 100:

#             break

#         # print(i)

##     current_dir = os.path.abspath('.')

#     file_name2 = os.path.join(current_dir, 'abstract.csv')

#     f2 = open(file_name2, 'w+', encoding='utf8')

##     for item in ll:

#         f2.write("\n")

#         f2.write("*****=測試(原文)====")

#         f2.write("\n")

#         f2.write(str(item[0]))

#         f2.write("\n")

#         f2.write("*****=摘要====")

#         f2.write("\n")

#         f2.write(str(item[1]))

#         f2.write("\n")

##     f2.close()

def textranktest1():

""""載入50萬資料"""

path = "d:\\100w"

file = open(path, 'rb')

a = pickle.load(file)

# print(a)

tr4s = textrank4sentence()

ll =

i = 0

for item in a:

print(item[0])

rowitem = item[2]

tr4s.analyze(text=rowitem, lower=true, source='all_filters')

("***************====原文***************====")

# print(rowitem)

# print("\033[1;31m%s\033[43m" %rowitem)

ceshi =

print()

print('***************===摘要********************=')

for item in tr4s.get_key_sentences(num=3):

print(item.index, item.weight, item.sentence)  # index是語句在文字中位置,weight是權重

i = i + 1

if i > 100:

break

print(i)

current_dir = os.path.abspath('.')

file_name2 = os.path.join(current_dir, 'abstract.csv')

f2 = open(file_name2, 'w+', encoding='utf8')

for item in ll:

f2.write("\033[0;31m%s\033[0m" % "*****=測試(原文)====")

f2.write("\n")

f2.write(str(item[0]))

f2.write("\n")

f2.write("\033[0;31m%s\033[0m" % "*****=摘要====")

f2.write("\n")

f2.write(str(item[1]))

f2.write("\n")

f2.close()

# print("\033[0;31m%s\033[0m" % "*****=測試====")

if __name__ == '__main__':

main()

基於NPOI開源框架寫的ExcelHelper

namespace exceltest datatable匯出到excel的memorystream 源datatable 表頭文字 public static memorystream export datatable dtsource,string strheadertext endregion...

基於wsgiref模組寫的web框架

基於wsgiref web框架 from wsgiref.server import make server from urls import urls from views import def run env,response param env 請求相關的所有資料型別 param respon...

基於NPOI開源框架寫的ExcelHelper

namespace exceltest datatable匯出到excel的memorystream 源datatable 表頭文字 public static memorystream export datatable dtsource,string strheadertext endregion...