听别人一直在说王晶导演的《倚天屠龙记之九阳神功》,靠着兴趣爬取了豆瓣的评论,做个词云图
这次的词云图的颜色,也是选自它的海报,下面是图片
结果
import jieba
import wordcloud
import imageio as io
from imageio import imread
import matplotlib.pyplot as plt
from wordcloud import WordCloud, ImageColorGenerator
# 处理txt文件中爬取的评论
comment = open('comments2.txt', 'rb')
words = []
for line in comment.readlines():
if(len(line)) == 12:
continue
A = jieba.cut(line)
words.append(" ".join(A))
stopwords = [',','。','【','】', '”','“',',','《','》','!','、','?','.','…','1','2','3','4','5','[',']','(',')',' ']
new_words = []
for sent in words:
word_in = sent.split(' ')
new_word_in = []
for word in word_in:
if word in stopwords:
continue
else:
new_word_in.append(word)
new_sent = ' '.join(new_word_in)
new_words.append(new_sent)
final_words = []
for sent in new_words:
sent = sent.split(' ')
final_words += sent
final_words_flt = []
for word in final_words:
if word == ' ':
continue
else:
final_words_flt.append(word)
text = ' '.join(final_words_flt)
font = r'C:WindowsFontsFZSTK.TTF'
# bk = imread('2.png',pilmode="RGB")
bk = io.imread('2.jpeg')
wc = WordCloud(collocations=False, mask = bk, font_path=font, width=1400, height=1400, margin=2).generate(text.lower())
# 读取照片中的颜色
image_colors = ImageColorGenerator(bk)
plt.imshow(wc.recolor(color_func=image_colors))
plt.axis('off')
plt.imshow(bk, cmap=plt.cm.gray)
plt.axis('off')
plt.show()
wc.to_file('word_cloud.png')
需要注意的是scipy.misc里面的imread高版本中已经不能使用了,所以可以换成imageio来获取图片
爬取的评论:https://download.csdn.net/download/KIKI_ZSH/82328749



