Won't there be a data breach? #13

Moshibing · 2024-04-12T10:09:07Z

def cora_generate_features(cora_data):
### 1. 首先获得每个节点的标签，并统计所有可能的类别
node_list = list(cora_data["nodes"].keys())
node2label = dict()
classes = set()
for node_id, info in tqdm(cora_data["nodes"].items()):
label = info["label"].replace("_", " ")
node2label[node_id] = label
classes.add(label)
### 2. 对于每个节点，获得其2-hop邻接子图
node2adj = dict() # 1-hop子图
for (head_id, tail_id), _ in tqdm(cora_data["links"].items()):
if head_id not in node2adj.keys():
node2adj[head_id] = list()
node2adj[head_id].append(tail_id)
node2adj_2hop_triples = dict() # 2-hop子图中的所有三元组
node2adj_2hop_nodes = dict() # 2-hop子图中的所有节点
for node_id, adj in tqdm(node2adj.items()):
if node_id not in node2adj_2hop_triples.keys():
node2adj_2hop_triples[node_id] = set()
node2adj_2hop_nodes[node_id] = {node_id}
for onehop_tail_id in adj:
node2adj_2hop_triples[node_id].add((node_id, onehop_tail_id))
node2adj_2hop_nodes[node_id].add(onehop_tail_id)
if onehop_tail_id in node2adj.keys():
for twohop_tail_id in node2adj[onehop_tail_id]:
# if twohop_tail_id == node_id:
# continue
node2adj_2hop_triples[node_id].add((onehop_tail_id, twohop_tail_id))
node2adj_2hop_nodes[node_id].add(twohop_tail_id)
### 3. 划分训练集/测试集
# 剔除掉可能不存在于graph中的节点
new_node_list = list()
for node_id in node_list:
if node_id in node2adj_2hop_triples.keys():
new_node_list.append(node_id)
shuffle(new_node_list)
test_node_list, train_node_list = new_node_list[:1000], new_node_list[1000:]
print("train num: {}".format(len(train_node_list)))
print("test num: {}".format(len(test_node_list)))
return train_node_list, test_node_list, {
"node2adj_2hop_triples": node2adj_2hop_triples,
"node2adj_2hop_nodes": node2adj_2hop_nodes,
"node2label": node2label,
"classes": classes
}
cora_train_node_list, cora_test_node_list, cora_features = cora_generate_features(cora_data)

Dividing the train_node_list and test_node_list like this ensures that there is no overlap of the target_node, but the neighboring nodes of the train_node will contain the category information of the test_node.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Won't there be a data breach? #13

Won't there be a data breach? #13

Moshibing commented Apr 12, 2024

Won't there be a data breach? #13

Won't there be a data breach? #13

Comments

Moshibing commented Apr 12, 2024