You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
def cora_generate_features(cora_data):
### 1. 首先获得每个节点的标签,并统计所有可能的类别
node_list = list(cora_data["nodes"].keys())
node2label = dict()
classes = set()
for node_id, info in tqdm(cora_data["nodes"].items()):
label = info["label"].replace("_", " ")
node2label[node_id] = label
classes.add(label)
### 2. 对于每个节点,获得其2-hop邻接子图
node2adj = dict() # 1-hop子图
for (head_id, tail_id), _ in tqdm(cora_data["links"].items()):
if head_id not in node2adj.keys():
node2adj[head_id] = list()
node2adj[head_id].append(tail_id)
node2adj_2hop_triples = dict() # 2-hop子图中的所有三元组
node2adj_2hop_nodes = dict() # 2-hop子图中的所有节点
for node_id, adj in tqdm(node2adj.items()):
if node_id not in node2adj_2hop_triples.keys():
node2adj_2hop_triples[node_id] = set()
node2adj_2hop_nodes[node_id] = {node_id}
for onehop_tail_id in adj:
node2adj_2hop_triples[node_id].add((node_id, onehop_tail_id))
node2adj_2hop_nodes[node_id].add(onehop_tail_id)
if onehop_tail_id in node2adj.keys():
for twohop_tail_id in node2adj[onehop_tail_id]:
# if twohop_tail_id == node_id:
# continue
node2adj_2hop_triples[node_id].add((onehop_tail_id, twohop_tail_id))
node2adj_2hop_nodes[node_id].add(twohop_tail_id)
### 3. 划分训练集/测试集
# 剔除掉可能不存在于graph中的节点
new_node_list = list()
for node_id in node_list:
if node_id in node2adj_2hop_triples.keys():
new_node_list.append(node_id)
shuffle(new_node_list)
test_node_list, train_node_list = new_node_list[:1000], new_node_list[1000:]
print("train num: {}".format(len(train_node_list)))
print("test num: {}".format(len(test_node_list)))
return train_node_list, test_node_list, {
"node2adj_2hop_triples": node2adj_2hop_triples,
"node2adj_2hop_nodes": node2adj_2hop_nodes,
"node2label": node2label,
"classes": classes
}
cora_train_node_list, cora_test_node_list, cora_features = cora_generate_features(cora_data)
Dividing the train_node_list and test_node_list like this ensures that there is no overlap of the target_node, but the neighboring nodes of the train_node will contain the category information of the test_node.
The text was updated successfully, but these errors were encountered:
def cora_generate_features(cora_data):
### 1. 首先获得每个节点的标签,并统计所有可能的类别
node_list = list(cora_data["nodes"].keys())
node2label = dict()
classes = set()
for node_id, info in tqdm(cora_data["nodes"].items()):
label = info["label"].replace("_", " ")
node2label[node_id] = label
classes.add(label)
### 2. 对于每个节点,获得其2-hop邻接子图
node2adj = dict() # 1-hop子图
for (head_id, tail_id), _ in tqdm(cora_data["links"].items()):
if head_id not in node2adj.keys():
node2adj[head_id] = list()
node2adj[head_id].append(tail_id)
node2adj_2hop_triples = dict() # 2-hop子图中的所有三元组
node2adj_2hop_nodes = dict() # 2-hop子图中的所有节点
for node_id, adj in tqdm(node2adj.items()):
if node_id not in node2adj_2hop_triples.keys():
node2adj_2hop_triples[node_id] = set()
node2adj_2hop_nodes[node_id] = {node_id}
for onehop_tail_id in adj:
node2adj_2hop_triples[node_id].add((node_id, onehop_tail_id))
node2adj_2hop_nodes[node_id].add(onehop_tail_id)
if onehop_tail_id in node2adj.keys():
for twohop_tail_id in node2adj[onehop_tail_id]:
# if twohop_tail_id == node_id:
# continue
node2adj_2hop_triples[node_id].add((onehop_tail_id, twohop_tail_id))
node2adj_2hop_nodes[node_id].add(twohop_tail_id)
### 3. 划分训练集/测试集
# 剔除掉可能不存在于graph中的节点
new_node_list = list()
for node_id in node_list:
if node_id in node2adj_2hop_triples.keys():
new_node_list.append(node_id)
shuffle(new_node_list)
test_node_list, train_node_list = new_node_list[:1000], new_node_list[1000:]
print("train num: {}".format(len(train_node_list)))
print("test num: {}".format(len(test_node_list)))
return train_node_list, test_node_list, {
"node2adj_2hop_triples": node2adj_2hop_triples,
"node2adj_2hop_nodes": node2adj_2hop_nodes,
"node2label": node2label,
"classes": classes
}
cora_train_node_list, cora_test_node_list, cora_features = cora_generate_features(cora_data)
Dividing the train_node_list and test_node_list like this ensures that there is no overlap of the target_node, but the neighboring nodes of the train_node will contain the category information of the test_node.
The text was updated successfully, but these errors were encountered: