python 生成詞云

2021-10-06 14:49:54 字數 4258 閱讀 7866

#-*- coding:utf-8 -*-

from wordcloud import wordcloud

import matplotlib.pyplot as plt

import jieba

from pil import image

import numpy as np

# 生成詞云

defcreate_word_cloud

(f):

print

('根據詞頻計算詞云'

) text =

" ".join(jieba.cut(f,cut_all=

false

, hmm=

true))

wc = wordcloud(

font_path=

"./simhei.ttf"

, max_words=

100,

width=

2000

, height=

1200,)

wordcloud = wc.generate(text)

# 寫詞云

)# 顯示詞云檔案

plt.imshow(wordcloud)

plt.axis(

"off"

) plt.show(

)f = '資料分析全景圖及修煉指南\

學習資料探勘的最佳學習路徑是什麼?\

python基礎語法:開始你的python之旅\

python科學計算:numpy\

python科學計算:pandas\

學習資料分析要掌握哪些基本概念?\

使用者畫像:標籤化就是資料的抽象能力\

資料採集:如何自動化採集資料?\

資料清洗:資料科學家80

%時間都花費在了這裡?\

資料整合:這些大號一共20億粉絲?\

資料變換:大學成績要求正態分佈合理麼?\

資料視覺化:掌握資料領域的萬金油技能\

一次學會python資料視覺化的10種技能'

如果執行有錯誤,要更改自己的文字路徑,

# -*- coding:utf-8 -*-

# 網易雲** 通過歌手id,生成該歌手的詞云

import requests

import sys

import re

import os

from wordcloud import wordcloud

import matplotlib.pyplot as plt

import jieba

from pil import image

import numpy as np

from lxml import etree

headers =

# 得到某一首歌的歌詞

defget_song_lyric

(headers,lyric_url)

: res = requests.request(

'get'

, lyric_url, headers=headers)

if'lrc'

in res.json():

lyric = res.json()[

'lrc'][

'lyric'

] new_lyric = re.sub(r'[\d:.[\]]',''

,lyric)

return new_lyric

else

:return

''print

(res.json())

# 去掉停用詞

defremove_stop_words

(f):

stop_words =

['作詞'

,'作曲'

,'編曲'

,'arranger'

,'錄音'

,'混音'

,'人聲'

,'vocal'

,'弦樂'

,'keyboard'

,'鍵盤'

,'編輯'

,'助理'

,'assistants'

,'mixing'

,'editing'

,'recording'

,'**'

,'製作'

,'producer'

,'發行'

,'produced'

,'and'

,'distributed'

]for stop_word in stop_words:

f = f.replace(stop_word,'')

return f

# 生成詞云

defcreate_word_cloud

(f):

print

('根據詞頻,開始生成詞云!'

) f = remove_stop_words(f)

cut_text =

" ".join(jieba.cut(f,cut_all=

false

, hmm=

true))

wc = wordcloud(

font_path=

"./wc.ttf"

, max_words=

100,

width=

2000

, height=

1200,)

print

(cut_text)

wordcloud = wc.generate(cut_text)

# 寫詞云

)# 顯示詞云檔案

plt.imshow(wordcloud)

plt.axis(

"off"

) plt.show(

)# 得到指定歌手頁面 熱門前50的歌曲id,歌曲名

defget_songs

(artist_id)

: page_url =

''+ artist_id

# 獲取網頁html

res = requests.request(

'get'

, page_url, headers=headers)

# 用xpath解析 前50首熱門歌曲

html = etree.html(res.text)

href_xpath =

"//*[@id='hotsong-list']//a/@href"

name_xpath =

"//*[@id='hotsong-list']//a/text()"

hrefs = html.xpath(href_xpath)

names = html.xpath(name_xpath)

# 設定熱門歌曲的id,歌曲名稱

song_ids =

song_names =

for href, name in

zip(hrefs, names):9

:])print

(href,

' '

, name)

return song_ids, song_names

# 設定歌手id,毛不易為12138269

artist_id =

'12138269'

[song_ids, song_names]

= get_songs(artist_id)

# 所有歌詞

all_word =

''# 獲取每首歌歌詞

for(song_id, song_name)

inzip

(song_ids, song_names)

:# 歌詞api url

lyric_url =

''+ song_id +

'&lv=-1&kv=-1&tv=-1'

lyric = get_song_lyric(headers, lyric_url)

all_word = all_word +

' '+ lyric

print

(song_name)

#根據詞頻 生成詞云

create_word_cloud(all_word)

Python 生成詞云

import matplotlib.pyplot as plt from wordcloud import wordcloud import jieba text from file with apath open python.txt encoding utf 8 read wordlist af...

python 生成詞云

1 知識點 wordcloud引數講解 font path表示用到字型的路徑 width和height表示畫布的寬和高 prefer horizontal可以調整詞雲中字型水平和垂直的多少 mask即掩膜,產生詞云背景的區域 scale 計算和繪圖之間的縮放 min font size設定最小的字型...

Python 生成詞云

import matplotlib.pyplot as plt from wordcloud import wordcloud import jieba text from file with apath open python.txt encoding utf 8 read wordlist af...