import pandas as pd import jieba from collections import Counter import matplotlib.pyplot as plt import matplotlib from wordcloud import WordCloud # 设置字体 MAC matplotlib.rcParams['font.sans-serif'] = ['Arial Unicode MS'] # 设置字体 Windows # matplotlib.rcParams['font.sans-serif'] = ['SimHei'] matplotlib.rcParams['axes.unicode_minus'] = False # 正确显示负号 # 读取Excel文件 df = pd.read_excel('智慧社区文本.xlsx') # 读取停用词表 with open('stopwords.txt', 'r', encoding='utf-8') as f: stop_words = set([line.strip() for line in f.readlines()]) # 合并所有文本内容到一个字符串 all_text = ' '.join(df['文本内容'].dropna()) # 使用jieba进行中文分词 words = jieba.cut(all_text) # 过滤停用词并统计词频 filtered_words = [word for word in words if word not in stop_words and len(word) > 1] word_counts = Counter(filtered_words) # 可视化最常见的词 most_common_words = word_counts.most_common(20) words, counts = zip(*most_common_words) plt.figure(figsize=(10, 8)) plt.barh(words, counts) plt.xlabel('词频') plt.title('词频分析') plt.gca().invert_yaxis() # 反转y轴,使得最高的条形图在上方 plt.savefig('词频分析.png') plt.show() # 创建词云图 wordcloud = WordCloud(font_path='苹方-简.ttf',width=800, height=600, background_color='white').generate_from_frequencies(word_counts) plt.figure(figsize=(10, 8)) plt.imshow(wordcloud, interpolation='bilinear') plt.axis('off') # 关闭坐标轴 plt.title('词云图') plt.savefig('词云图.png') plt.show()