栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 软件开发 > 后端开发 > Python

Node2Vec实战

Python 更新时间: 发布时间: IT归档 最新发布 模块sitemap 名妆网 法律咨询 聚返吧 英语巴士网 伯小乐 网商动力

Node2Vec实战

Node2Vec实战 数据结构

两个两个连接的节点

1 2
2 3
4 5
主程序构建
G = nx.read_edgelist('../data/text.txt',
                         create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)])
## 构建模型
model = Node2Vec(G, walk_length=10, num_walks=80,
                 p=0.25, q=4, workers=1, use_rejection_sampling=0)
## 训练
model.train(embed_size=4, window_size=5, iter=3)
embeddings = model.get_embeddings()
print(embeddings)

初始生成节点到节点的概率
def preprocess_transition_probs(self):
		'''
		Preprocessing of transition probabilities for guiding the random walks.
		'''
           ####get_alias_edge这个函数是对每条边设定为二阶randomwalk的概率形式
             ###这个函数的作用是生成每个边界的概率,同时会有alias_setup这个函数将概率进行转换,方便后面抽样
		G = self.G
		is_directed = self.is_directed

		alias_nodes = {}
		for node in G.nodes():
			unnormalized_probs = [G[node][nbr]['weight'] for nbr in sorted(G.neighbors(node))]#读取每个邻点权重
			norm_const = sum(unnormalized_probs)###权重求和,作为公式中正则项常数的那个分母
			normalized_probs =  [float(u_prob)/norm_const for u_prob in unnormalized_probs]###除以分母
			alias_nodes[node] = alias_setup(normalized_probs)
		alias_edges = {}
		triads = {}
		if is_directed:
			for edge in G.edges():
				alias_edges[edge] = self.get_alias_edge(edge[0], edge[1])
		else:
			for edge in G.edges():
				alias_edges[edge] = self.get_alias_edge(edge[0], edge[1])
				alias_edges[(edge[1], edge[0])] = self.get_alias_edge(edge[1], edge[0])

		self.alias_nodes = alias_nodes
		self.alias_edges = alias_edges

		return

get_alias_edge是得到节点到节点的概率

def get_alias_edge(self, src, dst):####二阶ramdom walk
        #src是随机游走序列中的上一个节点,dst是当前节点
		'''
		Get the alias edge setup lists for a given edge.
		'''
		G = self.G
		p = self.p
		q = self.q

		unnormalized_probs = []
		for dst_nbr in sorted(G.neighbors(dst)):
			if dst_nbr == src:
				unnormalized_probs.append(G[dst][dst_nbr]['weight']/p)
			elif G.has_edge(dst_nbr, src):
				unnormalized_probs.append(G[dst][dst_nbr]['weight'])
			else:
				unnormalized_probs.append(G[dst][dst_nbr]['weight']/q)
		norm_const = sum(unnormalized_probs)
		normalized_probs =  [float(u_prob)/norm_const for u_prob in unnormalized_probs]

		return alias_setup(normalized_probs)

alias_setup :输入概率,得到对应的两组数,方便后面的抽样调用

def alias_setup(probs):
	'''
    alias_setup的作用是根据二阶random walk输出的概率变成每个节点对应两个数,被后面的alias_draw函数所进行抽样
	'''
	K = len(probs)
	q = np.zeros(K)
	J = np.zeros(K, dtype=np.int)

	smaller = []
	larger = []
	for kk, prob in enumerate(probs):
	    q[kk] = K*prob
	    if q[kk] < 1.0:
	        smaller.append(kk)
	    else:
	        larger.append(kk)##kk是下标,表示哪些下标小

	while len(smaller) > 0 and len(larger) > 0:
	    small = smaller.pop()##smaller自己也会减少最右边的值
	    large = larger.pop()

	    J[small] = large
	    q[large] = q[large] + q[small] - 1.0
	    if q[large] < 1.0:
	        smaller.append(large)
	    else:
	        larger.append(large)

	return J, q

alias_draw 抽样函数

def alias_draw(J, q):
	'''
	Draw sample from a non-uniform discrete distribution using alias sampling.
	'''
	K = len(J)

	kk = int(np.floor(np.random.rand()*K))
	if np.random.rand() < q[kk]:
	    return kk
	else:
	    return J[kk]

node2vec_walk就是对于给定的长度,对于开始节点开始模拟这个节点的路径,涉及的函数都在上面提及

def node2vec_walk(self, walk_length, start_node):
	'''
	Simulate a random walk starting from start node.
	'''
	G = self.G
	alias_nodes = self.alias_nodes
	alias_edges = self.alias_edges

	walk = [start_node]
     ######alias_draw这个函数是等于是根据二阶random walk概率选择下一个点
	while len(walk) < walk_length:
		cur = walk[-1]
		cur_nbrs = sorted(G.neighbors(cur))###G.neighbors(cur)得到cur一级关联的节点
		if len(cur_nbrs) > 0:
			if len(walk) == 1:
               ####cur[0]
				walk.append(cur_nbrs[alias_draw(alias_nodes[cur][0], alias_nodes[cur][1])])
			else:
				prev = walk[-2]
				next = cur_nbrs[alias_draw(alias_edges[(prev, cur)][0], 
					alias_edges[(prev, cur)][1])]
				walk.append(next)
		else:
			break

	return walk
转载请注明:文章转载自 www.mshxw.com
本文地址:https://www.mshxw.com/it/887056.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号