【无标题】_Python

【无标题】

import jieba.posseg as pseg
import jieba
from tqdm import tqdm#设置一个进度条

# 删除某些词
jieba.del_word('格斗')
jieba.del_word('明白')
jieba.del_word('段时间')
jieba.del_word('冷笑')
jieba.del_word('智慧')
jieba.del_word('徐徐')

# 姓名字典
names = {}
# 关系字典
relationships = {}
# 每段内人物关系
lineNames = []
#打开文件
with open('天.txt', 'r',encoding='utf-8') as fp:
    for line in tqdm(fp):
        line = line.strip('n')#去除换行
        poss = pseg.cut(line)# 分词返回词性

        # 为新读取的一段添加人物关系
        lineNames.append([])
        for w in poss:#遍历每一个
           # print("%s:%s" % (w.word, w.flag))
           # 分词长度小于2 或词性不为nr时则与影片所需分析人物无关
           if w.flag != "nr" or len(w.word) < 2:
               continue
           lineNames[-1].append(w.word)#当前段存放人物名

           if names.get(w.word) is None:#如果姓名未出现过
               names[w.word] = 0#当前姓名添加进names字典里
               relationships[w.word] = {}#初始化该姓名关系图
               # 人物出现次数+1
           names[w.word] += 1

print('namesn',names)
print('relationshipsn',relationships)
print('lineNamesn',lineNames)

#分析人物关系
for line in lineNames:
        for name1 in line:
            for name2 in line:
                if name1 == name2:
                    continue
                if relationships[name1].get(name2) is None:
                    # 两个人物第一次共同出现 初始化次数
                    relationships[name1][name2] = 1
                else:
                    # 两个人物共同出现 关系+1
                    relationships[name1][name2] += 1

# 写csv文件 用于网络图使用
def generate_gephi():
    # 人物权重(节点)
    with open("earth_node.csv", "w", encoding='utf-8') as f:
        f.write("Id,Label,Weightn")
        for name, times in names.items():
            f.write(name + "," + name + "," + str(times) + "n")

    # 人物关系边(边)
    with open("earth_edge.csv", "w", encoding='utf-8') as f:
        f.write("Source,Target,Weightn")
        for name, edge in relationships.items():
            for v, w in edge.items():
                if w > 3:
                    f.write(name + "," + v + "," + str(w) + "n")

generate_gephi()

import pandas as pd
import numpy as np
file=pd.read_csv('earth_node.csv',encoding='utf-8')#人物权重文件
file=file.dropna()

file1=pd.read_csv('earth_edge.csv',encoding='utf-8')#人物关系文件
file1=file1.dropna()#去空白

#主要人物
# namelist = []
file=file.sort_values('Weight',ascending=False)
namelist= file.loc[:,'Id']
namelist= namelist.iloc[0:25]
namelist=namelist.tolist()

change_order = []
for index, row in file1.iterrows():
    if (row['Source'] in namelist) and (row['Source'] not in change_order):
        change_order.append(row['Source'])
namelist=change_order

#人物权重
node_size=[]#no人物权重
for name in namelist :
    ttemp=file.loc[file['Id'] == name, 'Weight']
    tttemp=np.array(ttemp)
    temp=tttemp[0]
    node_size.append(temp)

node_size=[i * 0.5 for i in node_size]


# node_size=file.loc[:,'Weight']
# node_size=node_size.iloc[0:25]
# node_size=node_size.tolist()

# for name in namelist :
#     ttemp=file.loc[file['Id'] == name, 'Weight']
#     tttemp=np.array(ttemp)
#     temp=tttemp[0]
#     node_size.append(temp)




import networkx as nx
from pylab import *
import matplotlib.pyplot as plt
mpl.rcParams['font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False

def painting(): #绘制人物亲密度图
    G = nx.Graph()  # 绘制个人物之间的亲密关系

    for index, row in file1.iterrows():
        if row['Source'] in namelist:
            G.add_node(row['Source'])  # 添加节点# 将当前人物添加进节点

    for index, row in file1.iterrows():
        if (row['Source']  in namelist) & (row['Target']  in namelist):
            #G.add_node(row['Source'], row['Target'])
            G.add_weighted_edges_from([(row['Source'], row['Target'], 10*np.array(row['Weight']))])#添加权重



    pos = nx.shell_layout(G)
    print('画出网络图像：')
    wid = [float(d['weight'] * 0.003) for (u, v, d) in G.edges(data=True)]
    nx.draw(G, pos, with_labels=True, node_color=range(25), edge_color='red', node_size=node_size, alpha=0.5, width=wid)

    plt.show()


painting()

修改自NLP分析小说人物关系，找找主人公的真爱。_总裁余（余登武）博客-CSDN博客

【无标题】

Python相关栏目本月热门文章