update with README

YukeWang96 · May 7, 2023 · 0d40b53 · 0d40b53
1 parent 26b03f5
commit 0d40b53
Show file tree

Hide file tree

Showing 8 changed files with 255 additions and 170 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,9 @@
+[submodule "TCGNN-bSpmm"]
+	path = TCGNN-bSpmm
+	url = git@github.com:YukeWang96/TCGNN-bSpmm.git
+[submodule "TCGNN-tsparse"]
+	path = TCGNN-tsparse
+	url = git@github.com:YukeWang96/TCGNN-tsparse.git
+[submodule "TCGNN-trition"]
+	path = TCGNN-trition
+	url = git@github.com:YukeWang96/TCGNN-trition.git
diff --git a/3_cnt_TC_blk_SDDMM.py b/3_cnt_TC_blk_SDDMM.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python3
+import subprocess
+import datetime
+import os
+from collections import defaultdict
+import sys 
+import numpy as np
+import math
+
+dense_tile_H = 16
+dense_tile_W = 16
+
+dataset = [
+		('citeseer'	        		, 3703	    , 6   ),  
+		('cora' 	        		, 1433	    , 7   ),  
+		('pubmed'	        		, 500	    , 3   ),      
+		('ppi'	            		, 50	    , 121 ),   
+
+		('PROTEINS_full'             , 29       , 2) ,   
+		('OVCAR-8H'                  , 66       , 2) , 
+		('Yeast'                     , 74       , 2) ,
+		('DD'                        , 89       , 2) ,
+		('YeastH'                    , 75       , 2) ,   
+
+		( 'amazon0505'               , 96	  , 22),
+		( 'artist'                   , 100	  , 12),
+		( 'com-amazon'               , 96	  , 22),
+		( 'soc-BlogCatalog'	         , 128	  , 39),      
+		( 'amazon0601'  	         , 96	  , 22), 
+]
+
+
+data_dir = './tcgnn-ae-graphs/'
+print("dataset,origin,reduced,reduction (%)")
+fout = open("3_cnt_TC_blk_SDDMM.csv", "w")
+fout.write("dataset,origin,reduced,reduction (%)\n")
+
+def find_dense(path, data):
+	nodes = set()
+
+	graph = defaultdict(list)
+	graph_obj = np.load(path+'.npz', allow_pickle=True)
+	src_li = graph_obj['src_li']
+	dst_li = graph_obj['dst_li']
+	num_nodes = graph_obj['num_nodes']
+
+	for src, dst in zip(src_li, dst_li):
+		nodes.add(src)
+		nodes.add(dst)
+		graph[dst].append(src)
+
+	tile_cnt = 0
+	opt_cnt = 0
+	chunk_edges = []
+	for src_iter in range(0, num_nodes, dense_tile_H):
+
+		dst_list = []
+		for src in range(src_iter, src_iter + dense_tile_H):
+			dst_list += graph[src]
+
+		actual_cnt = len(dst_list)
+		chunk_edges.append(len(dst_list))
+
+		range_set = sorted(list(set(dst_list)))
+
+		# TC-GNN tiles
+		opt_cnt += (len(range_set) + dense_tile_W - 1)//dense_tile_W
+		tmp_opt_cnt = (len(range_set) + dense_tile_W - 1)//dense_tile_W
+		exp_opt_cnt = (dense_tile_H * dense_tile_W) * tmp_opt_cnt
+
+
+		# naive sliding window without compression.
+		tmp = 0
+		range_set = sorted(list(range_set))
+		i = j = 0
+		while i < len(range_set) and j < len(range_set):
+			end = range_set[i] + dense_tile_W
+			while j < len(range_set) and range_set[j] < end:
+				j += 1
+			i = j
+			tile_cnt += 1
+			tmp += 1
+
+		exp_tile_cnt =  (dense_tile_H * dense_tile_W) * tile_cnt
+
+		if tmp < tmp_opt_cnt:
+			print(range_set)
+			print(tmp, tmp_opt_cnt)
+			print("tmp < tmp_opt_cnt Error Encounter, Duplicate Edges")
+			sys.exit(0)
+
+	print("{},{},{},{:.2f}".format(data, tile_cnt, opt_cnt, \
+												100 * (tile_cnt - opt_cnt) / tile_cnt))
+
+	fout = open("3_cnt_TC_blk_SDDMM.csv", "a")
+	fout.write("{},{},{},{:.2f}\n".format(data, tile_cnt, opt_cnt, 100 * (tile_cnt - opt_cnt) / tile_cnt))
+
+
+if __name__ == '__main__':
+	fout = open("3_cnt_TC_blk_SDDMM.csv", "w")
+	for data, d, c in dataset:
+		find_dense(data_dir + data, data)
+	fout.close()
+	print("\n\nCheck [3_cnt_TC_blk_SDDMM.csv] for results\n\n")
diff --git a/3_cnt_TC_blk_SpMM.py b/3_cnt_TC_blk_SpMM.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+import subprocess
+import datetime
+import os
+from collections import defaultdict
+import sys 
+import numpy as np
+import math
+
+dense_tile_H = 16
+dense_tile_W = 8
+
+dataset = [
+		('citeseer'	        		, 3703	    , 6   ),  
+		('cora' 	        		, 1433	    , 7   ),  
+		('pubmed'	        		, 500	    , 3   ),      
+		('ppi'	            		, 50	    , 121 ),   
+
+		('PROTEINS_full'             , 29       , 2) ,   
+		('OVCAR-8H'                  , 66       , 2) , 
+		('Yeast'                     , 74       , 2) ,
+		('DD'                        , 89       , 2) ,
+		('YeastH'                    , 75       , 2) ,   
+
+		( 'amazon0505'               , 96	  , 22),
+		( 'artist'                   , 100	  , 12),
+		( 'com-amazon'               , 96	  , 22),
+		( 'soc-BlogCatalog'	         , 128	  , 39),      
+		( 'amazon0601'  	         , 96	  , 22), 
+]
+
+
+data_dir = './tcgnn-ae-graphs/'
+print("dataset,origin,reduced,reduction (%)")
+fout = open("3_cnt_TC_blk_SpMM.csv", "w")
+fout.write("dataset,origin,reduced,reduction (%)\n")
+
+def find_dense(path, data):
+	nodes = set()
+
+	graph = defaultdict(list)
+	graph_obj = np.load(path+'.npz', allow_pickle=True)
+	src_li = graph_obj['src_li']
+	dst_li = graph_obj['dst_li']
+	num_nodes = graph_obj['num_nodes']
+
+	for src, dst in zip(src_li, dst_li):
+		nodes.add(src)
+		nodes.add(dst)
+		graph[dst].append(src)
+
+	tile_cnt = 0
+	opt_cnt = 0
+	chunk_edges = []
+	for src_iter in range(0, num_nodes, dense_tile_H):
+
+		dst_list = []
+		for src in range(src_iter, src_iter + dense_tile_H):
+			dst_list += graph[src]
+
+		actual_cnt = len(dst_list)
+		chunk_edges.append(len(dst_list))
+
+		range_set = sorted(list(set(dst_list)))
+
+		# TC-GNN tiles
+		opt_cnt += (len(range_set) + dense_tile_W - 1)//dense_tile_W
+		tmp_opt_cnt = (len(range_set) + dense_tile_W - 1)//dense_tile_W
+		exp_opt_cnt = (dense_tile_H * dense_tile_W) * tmp_opt_cnt
+
+
+		# naive sliding window without compression.
+		tmp = 0
+		range_set = sorted(list(range_set))
+		i = j = 0
+		while i < len(range_set) and j < len(range_set):
+			end = range_set[i] + dense_tile_W
+			while j < len(range_set) and range_set[j] < end:
+				j += 1
+			i = j
+			tile_cnt += 1
+			tmp += 1
+
+		exp_tile_cnt =  (dense_tile_H * dense_tile_W) * tile_cnt
+
+		if tmp < tmp_opt_cnt:
+			print(range_set)
+			print(tmp, tmp_opt_cnt)
+			print("tmp < tmp_opt_cnt Error Encounter, Duplicate Edges")
+			sys.exit(0)
+
+	print("{},{},{},{:.2f}".format(data, tile_cnt, opt_cnt, 100 * (tile_cnt - opt_cnt) / tile_cnt))
+	fout = open("3_cnt_TC_blk_SpMM.csv", "a")
+	fout.write("{},{},{},{:.2f}\n".format(data, tile_cnt, opt_cnt, 100 * (tile_cnt - opt_cnt) / tile_cnt))
+
+
+
+if __name__ == '__main__':
+	fout = open("3_cnt_TC_blk_SpMM.csv", "w")
+	for data, d, c in dataset:
+		find_dense(data_dir + data, data)
+	fout.close()
+	print("\n\nCheck [3_cnt_TC_blk_SpMM.csv] for results.\n\n")
diff --git a/README.md b/README.md
@@ -81,49 +81,52 @@ wget https://storage.googleapis.com/graph_dataset/tcgnn-ae-graphs.tar.gz
 tar -zxvf tcgnn-ae-graphs.tar.gz && rm -rf tcgnn-ae-graphs.tar.gz
 ``` 
 
-## Running **PyG** baseline.
-> + Go to **`pyg_baseline/`** directory;
-> + `./0_run_pyg.sh`to run all pyg experiments.
-> + Check the results in **`1_bench_gcn.csv`** and **`1_bench_agnn.csv`**, which are similar as below.
-
-| dataset         | Avg.Epoch (ms) |
-|:-----------------|----------------:|
-| citeseer        | 10.149         |
-| cora            | 9.964          |
-| pubmed          | 10.114         |
-| ppi             | 13.419         |
-| PROTEINS_full   | 10.908         |
-| OVCAR-8H        | 72.636         |
-| Yeast           | 66.644         |
-| DD              | 18.972         |
-| YeastH          | 118.047        |
-| amazon0505      | 29.731         |
-| artist          | 11.172         |
-| com-amazon      | 22.476         |
-| soc-BlogCatalog | 14.971         |
-| amazon0601      | 26.621         |
-
-<!-- > + Change `run_GCN=True` or  `run_GCN=False` in `0_bench.py` with `gcn` and `gin` to profile the example GCN and GIN model, respectively;
-> + `./0_bench.py| tee run_pyg.log` to run the script and the report 10 epoch runtime for all evaluated datasets. 
-> + `./1_log2csv.py` to convert the `run_pyg.log` to `run_pyg.csv` for ease of analysis. -->
-
-## Running **DGL** baseline.
+## Running **DGL** baseline (Fig-6a).
 > +  Go to **`dgl_baseline/`** directory.
 > + `./0_run_dgl.sh`to run all dgl experiments.
 > + Check the results in `1_bench_gcn.csv` and `1_bench_agnn.csv`.
 
-<!-- > +  Pass the `--model` parameter in `dgl_main.py` with `gcn` and  `gin` to profile the example GCN and GIN model, respectively;
-> + `./0_bench.py| tee run_dgl.log` to run the script and the report 10 epoch runtime for all evaluated datasets. 
-> + `./1_log2csv.py` to convert the `run_dgl.log` to `run_dgl.csv` for ease of visualization. -->
+## Running **PyG** baseline (Fig-6b).
+> + Go to **`pyg_baseline/`** directory;
+> + `./0_run_pyg.sh`to run all pyg experiments.
+> + Check the results in **`1_bench_gcn.csv`** and **`1_bench_agnn.csv`**, which are similar as below.
+
 
 ## Running **TC-GNN**.
 > +  Go to project root directory.
 > + `./0_run_tcgnn.sh`to run all dgl experiments.
 > + Check the results in `1_bench_gcn.csv` and `1_bench_agnn.csv`.
 
-<!-- > +  Under the current project directory 
-> + `./0_bench.py| tee run_TCGNN.log` to run the script and the report 10 epoch runtime for all evaluated datasets. 
-> + `./1_log2csv.py` to convert the `run_TCGNN.log` to `run_TCGNN.csv` for ease of analysis. -->
+## Dense Tile Reduction (Fig-7).
+```
+python 3_cnt_TC_blk_SDDMM.py
+python 3_cnt_TC_blk_SpMM.py
+```
++ Check the results in `3_cnt_TC_blk_SDDMM.csv` and `3_cnt_TC_blk_SDDMM.csv`.
+
+
+## cuSPARSE-bSpMM Baseline (Fig-6c) 
+```
+cd TCGNN-bSpmm/cusparse
+./0_run_bSpMM.sh
+```
++ Check the results in `1_run_bSpMM.csv`.
+
+
+## tSparse Baseline (Table-5, column-2).
+```
+cd TCGNN-tsparse/
+./0_run_tSparse.sh
+```
++ Check the results in `1_run_tSparse.csv`.
+
+## Triton Baseline (Table-5, column-3).
+```
+cd TCGNN-trition/python/bench
+./0_run_triton
+```
++ Check the results in `1_run_triton.csv`.
+
 
 ## Reference.
 + [**Deep Graph Library**](https://github.com/dmlc/dgl) <br>

diff --git a/TCGNN-bSpmm b/TCGNN-bSpmm
diff --git a/TCGNN-trition b/TCGNN-trition
diff --git a/TCGNN-tsparse b/TCGNN-tsparse