中文文本分析-《习近平二十大讲话》¶
- 中文分词第三方库: pip install jieba -i https://pypi.tuna.tsinghua.edu.cn/simple
- 词云库:pip install wordcloud -i https://pypi.tuna.tsinghua.edu.cn/simple
要求:请修改完善以下代码,完成文本读取并生成个性化词云。¶
In [41]:
#导入需要的库
import jieba
import wordcloud as wc
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
#读取文本
f = open("习近平二十大讲话-节选.txt", "r")
report=f.read()
f.close
# print(report)
# 进行词分割
wordlist = jieba.lcut(report)
# 对词进行拼接(空格)来间隔
report = " ".join(wordlist)
# 读取图片中的蒙版对象
heart_mask = np.array(Image.open("heart.png"))
#设置不同参数,生成自己喜欢的词云
ciyun=wc.WordCloud(
background_color="white",
max_words=200,
font_path='simsun.ttc',
height=1200,
width=1600,
max_font_size=400,
random_state=10,
mask=heart_mask
)
ciyun.generate(report)
plt.imshow(ciyun)
plt.axis('off')
plt.show()
In [ ]: