diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..ef45a58 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,9 @@ +[submodule "TCGNN-bSpmm"] + path = TCGNN-bSpmm + url = git@github.com:YukeWang96/TCGNN-bSpmm.git +[submodule "TCGNN-tsparse"] + path = TCGNN-tsparse + url = git@github.com:YukeWang96/TCGNN-tsparse.git +[submodule "TCGNN-trition"] + path = TCGNN-trition + url = git@github.com:YukeWang96/TCGNN-trition.git diff --git a/3_cnt_TC_blk_SDDMM.py b/3_cnt_TC_blk_SDDMM.py new file mode 100755 index 0000000..d5bd9c0 --- /dev/null +++ b/3_cnt_TC_blk_SDDMM.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +import subprocess +import datetime +import os +from collections import defaultdict +import sys +import numpy as np +import math + +dense_tile_H = 16 +dense_tile_W = 16 + +dataset = [ + ('citeseer' , 3703 , 6 ), + ('cora' , 1433 , 7 ), + ('pubmed' , 500 , 3 ), + ('ppi' , 50 , 121 ), + + ('PROTEINS_full' , 29 , 2) , + ('OVCAR-8H' , 66 , 2) , + ('Yeast' , 74 , 2) , + ('DD' , 89 , 2) , + ('YeastH' , 75 , 2) , + + ( 'amazon0505' , 96 , 22), + ( 'artist' , 100 , 12), + ( 'com-amazon' , 96 , 22), + ( 'soc-BlogCatalog' , 128 , 39), + ( 'amazon0601' , 96 , 22), +] + + +data_dir = './tcgnn-ae-graphs/' +print("dataset,origin,reduced,reduction (%)") +fout = open("3_cnt_TC_blk_SDDMM.csv", "w") +fout.write("dataset,origin,reduced,reduction (%)\n") + +def find_dense(path, data): + nodes = set() + + graph = defaultdict(list) + graph_obj = np.load(path+'.npz', allow_pickle=True) + src_li = graph_obj['src_li'] + dst_li = graph_obj['dst_li'] + num_nodes = graph_obj['num_nodes'] + + for src, dst in zip(src_li, dst_li): + nodes.add(src) + nodes.add(dst) + graph[dst].append(src) + + tile_cnt = 0 + opt_cnt = 0 + chunk_edges = [] + for src_iter in range(0, num_nodes, dense_tile_H): + + dst_list = [] + for src in range(src_iter, src_iter + dense_tile_H): + dst_list += graph[src] + + actual_cnt = len(dst_list) + chunk_edges.append(len(dst_list)) + + range_set = sorted(list(set(dst_list))) + + # TC-GNN tiles + opt_cnt += (len(range_set) + dense_tile_W - 1)//dense_tile_W + tmp_opt_cnt = (len(range_set) + dense_tile_W - 1)//dense_tile_W + exp_opt_cnt = (dense_tile_H * dense_tile_W) * tmp_opt_cnt + + + # naive sliding window without compression. + tmp = 0 + range_set = sorted(list(range_set)) + i = j = 0 + while i < len(range_set) and j < len(range_set): + end = range_set[i] + dense_tile_W + while j < len(range_set) and range_set[j] < end: + j += 1 + i = j + tile_cnt += 1 + tmp += 1 + + exp_tile_cnt = (dense_tile_H * dense_tile_W) * tile_cnt + + if tmp < tmp_opt_cnt: + print(range_set) + print(tmp, tmp_opt_cnt) + print("tmp < tmp_opt_cnt Error Encounter, Duplicate Edges") + sys.exit(0) + + print("{},{},{},{:.2f}".format(data, tile_cnt, opt_cnt, \ + 100 * (tile_cnt - opt_cnt) / tile_cnt)) + + fout = open("3_cnt_TC_blk_SDDMM.csv", "a") + fout.write("{},{},{},{:.2f}\n".format(data, tile_cnt, opt_cnt, 100 * (tile_cnt - opt_cnt) / tile_cnt)) + + +if __name__ == '__main__': + fout = open("3_cnt_TC_blk_SDDMM.csv", "w") + for data, d, c in dataset: + find_dense(data_dir + data, data) + fout.close() + print("\n\nCheck [3_cnt_TC_blk_SDDMM.csv] for results\n\n") \ No newline at end of file diff --git a/3_cnt_TC_blk_SpMM.py b/3_cnt_TC_blk_SpMM.py new file mode 100755 index 0000000..e998f2e --- /dev/null +++ b/3_cnt_TC_blk_SpMM.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +import subprocess +import datetime +import os +from collections import defaultdict +import sys +import numpy as np +import math + +dense_tile_H = 16 +dense_tile_W = 8 + +dataset = [ + ('citeseer' , 3703 , 6 ), + ('cora' , 1433 , 7 ), + ('pubmed' , 500 , 3 ), + ('ppi' , 50 , 121 ), + + ('PROTEINS_full' , 29 , 2) , + ('OVCAR-8H' , 66 , 2) , + ('Yeast' , 74 , 2) , + ('DD' , 89 , 2) , + ('YeastH' , 75 , 2) , + + ( 'amazon0505' , 96 , 22), + ( 'artist' , 100 , 12), + ( 'com-amazon' , 96 , 22), + ( 'soc-BlogCatalog' , 128 , 39), + ( 'amazon0601' , 96 , 22), +] + + +data_dir = './tcgnn-ae-graphs/' +print("dataset,origin,reduced,reduction (%)") +fout = open("3_cnt_TC_blk_SpMM.csv", "w") +fout.write("dataset,origin,reduced,reduction (%)\n") + +def find_dense(path, data): + nodes = set() + + graph = defaultdict(list) + graph_obj = np.load(path+'.npz', allow_pickle=True) + src_li = graph_obj['src_li'] + dst_li = graph_obj['dst_li'] + num_nodes = graph_obj['num_nodes'] + + for src, dst in zip(src_li, dst_li): + nodes.add(src) + nodes.add(dst) + graph[dst].append(src) + + tile_cnt = 0 + opt_cnt = 0 + chunk_edges = [] + for src_iter in range(0, num_nodes, dense_tile_H): + + dst_list = [] + for src in range(src_iter, src_iter + dense_tile_H): + dst_list += graph[src] + + actual_cnt = len(dst_list) + chunk_edges.append(len(dst_list)) + + range_set = sorted(list(set(dst_list))) + + # TC-GNN tiles + opt_cnt += (len(range_set) + dense_tile_W - 1)//dense_tile_W + tmp_opt_cnt = (len(range_set) + dense_tile_W - 1)//dense_tile_W + exp_opt_cnt = (dense_tile_H * dense_tile_W) * tmp_opt_cnt + + + # naive sliding window without compression. + tmp = 0 + range_set = sorted(list(range_set)) + i = j = 0 + while i < len(range_set) and j < len(range_set): + end = range_set[i] + dense_tile_W + while j < len(range_set) and range_set[j] < end: + j += 1 + i = j + tile_cnt += 1 + tmp += 1 + + exp_tile_cnt = (dense_tile_H * dense_tile_W) * tile_cnt + + if tmp < tmp_opt_cnt: + print(range_set) + print(tmp, tmp_opt_cnt) + print("tmp < tmp_opt_cnt Error Encounter, Duplicate Edges") + sys.exit(0) + + print("{},{},{},{:.2f}".format(data, tile_cnt, opt_cnt, 100 * (tile_cnt - opt_cnt) / tile_cnt)) + fout = open("3_cnt_TC_blk_SpMM.csv", "a") + fout.write("{},{},{},{:.2f}\n".format(data, tile_cnt, opt_cnt, 100 * (tile_cnt - opt_cnt) / tile_cnt)) + + + +if __name__ == '__main__': + fout = open("3_cnt_TC_blk_SpMM.csv", "w") + for data, d, c in dataset: + find_dense(data_dir + data, data) + fout.close() + print("\n\nCheck [3_cnt_TC_blk_SpMM.csv] for results.\n\n") \ No newline at end of file diff --git a/README.md b/README.md index 6605a85..470f3ca 100644 --- a/README.md +++ b/README.md @@ -81,49 +81,52 @@ wget https://storage.googleapis.com/graph_dataset/tcgnn-ae-graphs.tar.gz tar -zxvf tcgnn-ae-graphs.tar.gz && rm -rf tcgnn-ae-graphs.tar.gz ``` -## Running **PyG** baseline. -> + Go to **`pyg_baseline/`** directory; -> + `./0_run_pyg.sh`to run all pyg experiments. -> + Check the results in **`1_bench_gcn.csv`** and **`1_bench_agnn.csv`**, which are similar as below. - -| dataset | Avg.Epoch (ms) | -|:-----------------|----------------:| -| citeseer | 10.149 | -| cora | 9.964 | -| pubmed | 10.114 | -| ppi | 13.419 | -| PROTEINS_full | 10.908 | -| OVCAR-8H | 72.636 | -| Yeast | 66.644 | -| DD | 18.972 | -| YeastH | 118.047 | -| amazon0505 | 29.731 | -| artist | 11.172 | -| com-amazon | 22.476 | -| soc-BlogCatalog | 14.971 | -| amazon0601 | 26.621 | - - - -## Running **DGL** baseline. +## Running **DGL** baseline (Fig-6a). > + Go to **`dgl_baseline/`** directory. > + `./0_run_dgl.sh`to run all dgl experiments. > + Check the results in `1_bench_gcn.csv` and `1_bench_agnn.csv`. - +## Running **PyG** baseline (Fig-6b). +> + Go to **`pyg_baseline/`** directory; +> + `./0_run_pyg.sh`to run all pyg experiments. +> + Check the results in **`1_bench_gcn.csv`** and **`1_bench_agnn.csv`**, which are similar as below. + ## Running **TC-GNN**. > + Go to project root directory. > + `./0_run_tcgnn.sh`to run all dgl experiments. > + Check the results in `1_bench_gcn.csv` and `1_bench_agnn.csv`. - +## Dense Tile Reduction (Fig-7). +``` +python 3_cnt_TC_blk_SDDMM.py +python 3_cnt_TC_blk_SpMM.py +``` ++ Check the results in `3_cnt_TC_blk_SDDMM.csv` and `3_cnt_TC_blk_SDDMM.csv`. + + +## cuSPARSE-bSpMM Baseline (Fig-6c) +``` +cd TCGNN-bSpmm/cusparse +./0_run_bSpMM.sh +``` ++ Check the results in `1_run_bSpMM.csv`. + + +## tSparse Baseline (Table-5, column-2). +``` +cd TCGNN-tsparse/ +./0_run_tSparse.sh +``` ++ Check the results in `1_run_tSparse.csv`. + +## Triton Baseline (Table-5, column-3). +``` +cd TCGNN-trition/python/bench +./0_run_triton +``` ++ Check the results in `1_run_triton.csv`. + ## Reference. + [**Deep Graph Library**](https://github.com/dmlc/dgl)
diff --git a/TCGNN-bSpmm b/TCGNN-bSpmm new file mode 160000 index 0000000..6d72df1 --- /dev/null +++ b/TCGNN-bSpmm @@ -0,0 +1 @@ +Subproject commit 6d72df1cfcd96fc01a7dadfcfe79bce680f7342c diff --git a/TCGNN-trition b/TCGNN-trition new file mode 160000 index 0000000..cdc9bfa --- /dev/null +++ b/TCGNN-trition @@ -0,0 +1 @@ +Subproject commit cdc9bfa46556bb8f46b95b16b86b6bb05c038584 diff --git a/TCGNN-tsparse b/TCGNN-tsparse new file mode 160000 index 0000000..b0fbee1 --- /dev/null +++ b/TCGNN-tsparse @@ -0,0 +1 @@ +Subproject commit b0fbee12de357e02e9f1e5d26862be4d263fe198 diff --git a/count_TC_blocks.py b/count_TC_blocks.py deleted file mode 100755 index 37338b3..0000000 --- a/count_TC_blocks.py +++ /dev/null @@ -1,137 +0,0 @@ -#!/usr/bin/env python3 -import subprocess -import datetime -import os -# import matplotlib.pyplot as plt -from collections import defaultdict -import sys -import numpy as np -import math - -dense_tile_H = 8 -dense_tile_W = 8 - -dataset = [ - # ('toy' , 3 , 2 ), - # ('tc_gnn_verify' , 16 , 2), - # ('tc_gnn_verify_2x' , 16 , 2), - - # ('citeseer' , 3703 , 6 ), - # ('cora' , 1433 , 7 ), - # ('pubmed' , 500 , 3 ), - # ('ppi' , 50 , 121 ), - - # ('PROTEINS_full' , 29 , 2) , - # ('OVCAR-8H' , 66 , 2) , - # ('Yeast' , 74 , 2) , - # ('DD' , 89 , 2) , - # ('YeastH' , 75 , 2) , - # ('SW-620H' , 66 , 2) , - - # ( 'amazon0505' , 96 , 22), - # ( 'artist' , 100 , 12), - # ( 'com-amazon' , 96 , 22), - ( 'soc-BlogCatalog' , 128 , 39), - ( 'amazon0601' , 96 , 22), - - - # ( 'web-BerkStan' , 100 , 12), - # ( 'Reddit' , 602 , 41), - - # ( 'wiki-topcats' , 300 , 12), - # ( 'COLLAB' , 100 , 3) , - # ( 'wiki-topcats' , 300 , 12), - # ( 'Reddit' , 602 , 41), - # ( 'enwiki-2013' , 100 , 12), - # ( 'amazon_also_bought' , 96 , 22), -] - - -data_dir = '/home/yuke/.graphs/orig/' -# print(data_dir) -# print("dataset,origin,origin_eff,reduced,reduced_eff,reduction (%)") - -def find_dense(path, data): - fp = open(path) - nodes = set() - - graph = defaultdict(list) - for line in fp: - src, dst = line.strip('\n').split(" ") - src, dst = int(src), int(dst) - nodes.add(src) - nodes.add(dst) - graph[dst].append(src) - num_nodes = max(nodes) - - - # blk_H = math.ceil(num_nodes/dense_tile_H) - # blk_W = math.ceil(num_nodes/dense_tile_W) - - # print(blk_H * blk_W) - # tiles = [0] * (blk_H * blk_W) - - # for src, dst in edges: - # blk_id_H = math.floor(src/dense_tile_H) - # blk_id_W = math.floor(dst/dense_tile_W) - # global_blk_idx = blk_id_H * blk_W + blk_id_W - # tiles[global_blk_idx] += 1 - tile_cnt = 0 - opt_cnt = 0 - chunk_edges = [] - for src_iter in range(0, num_nodes, dense_tile_H): - - dst_list = [] - for src in range(src_iter, src_iter + dense_tile_H): - dst_list += graph[src] - - actual_cnt = len(dst_list) - chunk_edges.append(len(dst_list)) - - range_set = sorted(list(set(dst_list))) - - # TC-GNN tiles - opt_cnt += (len(range_set) + dense_tile_W - 1)//dense_tile_W - tmp_opt_cnt = (len(range_set) + dense_tile_W - 1)//dense_tile_W - exp_opt_cnt = (dense_tile_H * dense_tile_W) * tmp_opt_cnt - - - # naive sliding window without compression. - tmp = 0 - range_set = sorted(list(range_set)) - i = j = 0 - while i < len(range_set) and j < len(range_set): - end = range_set[i] + dense_tile_W - while j < len(range_set) and range_set[j] < end: - j += 1 - i = j - tile_cnt += 1 - tmp += 1 - - exp_tile_cnt = (dense_tile_H * dense_tile_W) * tile_cnt - - if tmp < tmp_opt_cnt: - print(range_set) - print(tmp, tmp_opt_cnt) - print("tmp < tmp_opt_cnt Error Encounter, Duplicate Edges") - sys.exit(0) - - # print("{:10},Avg.Chunk.Size: {:.2f}".format(data, np.mean(chunk_edges))) - # print("{},{},{:.2f},{},{:.2f},{:.2f}".format(data, tile_cnt, \ - # actual_cnt/exp_tile_cnt, \ - # opt_cnt, actual_cnt/exp_opt_cnt, \ - # 100 * (tile_cnt - opt_cnt) / tile_cnt)) - - - naive_blockPerRow = math.ceil(tile_cnt/(num_nodes//dense_tile_H)) - tcgnn_blockPerRow = math.ceil(opt_cnt/(num_nodes//dense_tile_H)) - print("{},{},{}".format(data, naive_blockPerRow, tcgnn_blockPerRow)) - - # plt.hist(tiles, bins=100) - # plt.savefig("{}.pdf".format(data)) - # print(Counter(tiles)) - # return tiles -if __name__ == '__main__': - print("Dataset,Naive BPW,TC-GNN BPW") - for data, d, c in dataset: - find_dense(data_dir + data, data) \ No newline at end of file