wordcloud標準教程

2021-09-23 10:12:35 字數 1939 閱讀 9302

# -*- coding: utf-8 -*-

""""""

from os import path

from scipy.misc import imread

from wordcloud import wordcloud, stopwords

import matplotlib.pyplot as plt

def wordcount(filename):

'''簡單計算詞頻的函式

:param filename: 檔名

:return: 詞頻

'''wordcount = {}

file = open(filename,'r')

while true:

line = file.readline()

if line:

wordlist = line.split(',[')

if len(wordlist) == 1: continue

wordlist = wordlist[1].split('],')[0].split(',')

for word in wordlist:

word = word.replace(' ', '').replace("'", '')

if word in wordcount.keys():

wordcount[word] = wordcount[word]+1

else:

wordcount[word] = 1

else:

break

return [(k, wordcount[k]) for k in wordcount.keys()]

def generatecloud(filename,imagename,cloudname,fontname):

'''生成標籤雲的函式

'''coloring = imread(imagename) # 讀取背景

wc = wordcloud(background_color="white", # 背景顏色max_words=2000,# 詞云顯示的最大詞數

mask=coloring, # 設定背景

stopwords=stopwords, # 停止詞

font_path=fontname, # 相容中文字型

max_font_size=150) # 字型最大值

#計算好詞頻後使用generate_from_frequencies函式生成詞云

#txtfreq例子為[('詞a', 100),('詞b', 90),('詞c', 80)]

txtfreq = wordcount(filename)

wc.generate_from_frequencies(txtfreq)

# 生成

plt.imshow(wc)

plt.axis("off")

# 繪製詞云

plt.figure()

# 儲存詞云

wc.to_file(cloudname)

if __name__ == '__main__':

d = path.dirname(__file__) # 獲取當前檔案路徑

fontname = path.join(d, 'msyh.ttf') # 中文字型路徑

filename = path.join(d, '廣州.txt') # txt檔案路徑

imagename = path.join(d, "circle.jpg") # 背景路徑

cloudname = path.join(d, "cloud.png") # 標籤雲路徑

generatecloud(filename, imagename, cloudname, fontname)

WordCloud基本演算法

wordcloud基本演算法 關於wordcloud的用處我就不多說了,在這裡我假定乙個前提,然後在這個前提下來生成乙個wordcloud。1 要求生成的wordcloud占用的面積越小越好 2 要求盡量是矩形 下面是我的大概演算法 2 在可繪製區域隨機放入比重最大的字型,儲存當前的繪製區域。3 檢...

wordcloud 引數 含義

font path string 字型路徑,需要展現什麼字型就把該字型路徑 字尾名寫上,如 font path 黑體.ttf width int default 400 輸出的畫布寬度,預設為400畫素 height int default 200 輸出的畫布高度,預設為200畫素 prefer h...

詞云分析wordcloud

jieba模組 用來切割中文的模組 pillow python3中用來專門處理影象的模組 import re import jieba from pil import image from wordcloud import wordcloud import numpy as np def gen w...