diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..ef45a58
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,9 @@
+[submodule "TCGNN-bSpmm"]
+	path = TCGNN-bSpmm
+	url = git@github.com:YukeWang96/TCGNN-bSpmm.git
+[submodule "TCGNN-tsparse"]
+	path = TCGNN-tsparse
+	url = git@github.com:YukeWang96/TCGNN-tsparse.git
+[submodule "TCGNN-trition"]
+	path = TCGNN-trition
+	url = git@github.com:YukeWang96/TCGNN-trition.git
diff --git a/3_cnt_TC_blk_SDDMM.py b/3_cnt_TC_blk_SDDMM.py
new file mode 100755
index 0000000..d5bd9c0
--- /dev/null
+++ b/3_cnt_TC_blk_SDDMM.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python3
+import subprocess
+import datetime
+import os
+from collections import defaultdict
+import sys 
+import numpy as np
+import math
+
+dense_tile_H = 16
+dense_tile_W = 16
+
+dataset = [
+		('citeseer'	        		, 3703	    , 6   ),  
+		('cora' 	        		, 1433	    , 7   ),  
+		('pubmed'	        		, 500	    , 3   ),      
+		('ppi'	            		, 50	    , 121 ),   
+		
+		('PROTEINS_full'             , 29       , 2) ,   
+		('OVCAR-8H'                  , 66       , 2) , 
+		('Yeast'                     , 74       , 2) ,
+		('DD'                        , 89       , 2) ,
+		('YeastH'                    , 75       , 2) ,   
+
+		( 'amazon0505'               , 96	  , 22),
+		( 'artist'                   , 100	  , 12),
+		( 'com-amazon'               , 96	  , 22),
+		( 'soc-BlogCatalog'	         , 128	  , 39),      
+		( 'amazon0601'  	         , 96	  , 22), 
+]
+
+
+data_dir = './tcgnn-ae-graphs/'
+print("dataset,origin,reduced,reduction (%)")
+fout = open("3_cnt_TC_blk_SDDMM.csv", "w")
+fout.write("dataset,origin,reduced,reduction (%)\n")
+
+def find_dense(path, data):
+	nodes = set()
+
+	graph = defaultdict(list)
+	graph_obj = np.load(path+'.npz', allow_pickle=True)
+	src_li = graph_obj['src_li']
+	dst_li = graph_obj['dst_li']
+	num_nodes = graph_obj['num_nodes']
+
+	for src, dst in zip(src_li, dst_li):
+		nodes.add(src)
+		nodes.add(dst)
+		graph[dst].append(src)
+
+	tile_cnt = 0
+	opt_cnt = 0
+	chunk_edges = []
+	for src_iter in range(0, num_nodes, dense_tile_H):
+
+		dst_list = []
+		for src in range(src_iter, src_iter + dense_tile_H):
+			dst_list += graph[src]
+
+		actual_cnt = len(dst_list)
+		chunk_edges.append(len(dst_list))
+
+		range_set = sorted(list(set(dst_list)))
+
+		# TC-GNN tiles
+		opt_cnt += (len(range_set) + dense_tile_W - 1)//dense_tile_W
+		tmp_opt_cnt = (len(range_set) + dense_tile_W - 1)//dense_tile_W
+		exp_opt_cnt = (dense_tile_H * dense_tile_W) * tmp_opt_cnt
+
+
+		# naive sliding window without compression.
+		tmp = 0
+		range_set = sorted(list(range_set))
+		i = j = 0
+		while i < len(range_set) and j < len(range_set):
+			end = range_set[i] + dense_tile_W
+			while j < len(range_set) and range_set[j] < end:
+				j += 1
+			i = j
+			tile_cnt += 1
+			tmp += 1
+
+		exp_tile_cnt =  (dense_tile_H * dense_tile_W) * tile_cnt
+
+		if tmp < tmp_opt_cnt:
+			print(range_set)
+			print(tmp, tmp_opt_cnt)
+			print("tmp < tmp_opt_cnt Error Encounter, Duplicate Edges")
+			sys.exit(0)
+	
+	print("{},{},{},{:.2f}".format(data, tile_cnt, opt_cnt, \
+												100 * (tile_cnt - opt_cnt) / tile_cnt))
+	
+	fout = open("3_cnt_TC_blk_SDDMM.csv", "a")
+	fout.write("{},{},{},{:.2f}\n".format(data, tile_cnt, opt_cnt, 100 * (tile_cnt - opt_cnt) / tile_cnt))
+	
+
+if __name__ == '__main__':
+	fout = open("3_cnt_TC_blk_SDDMM.csv", "w")
+	for data, d, c in dataset:
+		find_dense(data_dir + data, data)
+	fout.close()
+	print("\n\nCheck [3_cnt_TC_blk_SDDMM.csv] for results\n\n")
\ No newline at end of file
diff --git a/3_cnt_TC_blk_SpMM.py b/3_cnt_TC_blk_SpMM.py
new file mode 100755
index 0000000..e998f2e
--- /dev/null
+++ b/3_cnt_TC_blk_SpMM.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+import subprocess
+import datetime
+import os
+from collections import defaultdict
+import sys 
+import numpy as np
+import math
+
+dense_tile_H = 16
+dense_tile_W = 8
+
+dataset = [
+		('citeseer'	        		, 3703	    , 6   ),  
+		('cora' 	        		, 1433	    , 7   ),  
+		('pubmed'	        		, 500	    , 3   ),      
+		('ppi'	            		, 50	    , 121 ),   
+		
+		('PROTEINS_full'             , 29       , 2) ,   
+		('OVCAR-8H'                  , 66       , 2) , 
+		('Yeast'                     , 74       , 2) ,
+		('DD'                        , 89       , 2) ,
+		('YeastH'                    , 75       , 2) ,   
+
+		( 'amazon0505'               , 96	  , 22),
+		( 'artist'                   , 100	  , 12),
+		( 'com-amazon'               , 96	  , 22),
+		( 'soc-BlogCatalog'	         , 128	  , 39),      
+		( 'amazon0601'  	         , 96	  , 22), 
+]
+
+
+data_dir = './tcgnn-ae-graphs/'
+print("dataset,origin,reduced,reduction (%)")
+fout = open("3_cnt_TC_blk_SpMM.csv", "w")
+fout.write("dataset,origin,reduced,reduction (%)\n")
+
+def find_dense(path, data):
+	nodes = set()
+
+	graph = defaultdict(list)
+	graph_obj = np.load(path+'.npz', allow_pickle=True)
+	src_li = graph_obj['src_li']
+	dst_li = graph_obj['dst_li']
+	num_nodes = graph_obj['num_nodes']
+
+	for src, dst in zip(src_li, dst_li):
+		nodes.add(src)
+		nodes.add(dst)
+		graph[dst].append(src)
+
+	tile_cnt = 0
+	opt_cnt = 0
+	chunk_edges = []
+	for src_iter in range(0, num_nodes, dense_tile_H):
+
+		dst_list = []
+		for src in range(src_iter, src_iter + dense_tile_H):
+			dst_list += graph[src]
+
+		actual_cnt = len(dst_list)
+		chunk_edges.append(len(dst_list))
+
+		range_set = sorted(list(set(dst_list)))
+
+		# TC-GNN tiles
+		opt_cnt += (len(range_set) + dense_tile_W - 1)//dense_tile_W
+		tmp_opt_cnt = (len(range_set) + dense_tile_W - 1)//dense_tile_W
+		exp_opt_cnt = (dense_tile_H * dense_tile_W) * tmp_opt_cnt
+
+
+		# naive sliding window without compression.
+		tmp = 0
+		range_set = sorted(list(range_set))
+		i = j = 0
+		while i < len(range_set) and j < len(range_set):
+			end = range_set[i] + dense_tile_W
+			while j < len(range_set) and range_set[j] < end:
+				j += 1
+			i = j
+			tile_cnt += 1
+			tmp += 1
+
+		exp_tile_cnt =  (dense_tile_H * dense_tile_W) * tile_cnt
+
+		if tmp < tmp_opt_cnt:
+			print(range_set)
+			print(tmp, tmp_opt_cnt)
+			print("tmp < tmp_opt_cnt Error Encounter, Duplicate Edges")
+			sys.exit(0)
+	
+	print("{},{},{},{:.2f}".format(data, tile_cnt, opt_cnt, 100 * (tile_cnt - opt_cnt) / tile_cnt))
+	fout = open("3_cnt_TC_blk_SpMM.csv", "a")
+	fout.write("{},{},{},{:.2f}\n".format(data, tile_cnt, opt_cnt, 100 * (tile_cnt - opt_cnt) / tile_cnt))
+
+	
+
+if __name__ == '__main__':
+	fout = open("3_cnt_TC_blk_SpMM.csv", "w")
+	for data, d, c in dataset:
+		find_dense(data_dir + data, data)
+	fout.close()
+	print("\n\nCheck [3_cnt_TC_blk_SpMM.csv] for results.\n\n")
\ No newline at end of file
diff --git a/README.md b/README.md
index 6605a85..470f3ca 100644
--- a/README.md
+++ b/README.md
@@ -81,49 +81,52 @@ wget https://storage.googleapis.com/graph_dataset/tcgnn-ae-graphs.tar.gz
 tar -zxvf tcgnn-ae-graphs.tar.gz && rm -rf tcgnn-ae-graphs.tar.gz
 ``` 
 
-## Running **PyG** baseline.
-> + Go to **`pyg_baseline/`** directory;
-> + `./0_run_pyg.sh`to run all pyg experiments.
-> + Check the results in **`1_bench_gcn.csv`** and **`1_bench_agnn.csv`**, which are similar as below.
-
-| dataset         | Avg.Epoch (ms) |
-|:-----------------|----------------:|
-| citeseer        | 10.149         |
-| cora            | 9.964          |
-| pubmed          | 10.114         |
-| ppi             | 13.419         |
-| PROTEINS_full   | 10.908         |
-| OVCAR-8H        | 72.636         |
-| Yeast           | 66.644         |
-| DD              | 18.972         |
-| YeastH          | 118.047        |
-| amazon0505      | 29.731         |
-| artist          | 11.172         |
-| com-amazon      | 22.476         |
-| soc-BlogCatalog | 14.971         |
-| amazon0601      | 26.621         |
-
-<!-- > + Change `run_GCN=True` or  `run_GCN=False` in `0_bench.py` with `gcn` and `gin` to profile the example GCN and GIN model, respectively;
-> + `./0_bench.py| tee run_pyg.log` to run the script and the report 10 epoch runtime for all evaluated datasets. 
-> + `./1_log2csv.py` to convert the `run_pyg.log` to `run_pyg.csv` for ease of analysis. -->
-
-## Running **DGL** baseline.
+## Running **DGL** baseline (Fig-6a).
 > +  Go to **`dgl_baseline/`** directory.
 > + `./0_run_dgl.sh`to run all dgl experiments.
 > + Check the results in `1_bench_gcn.csv` and `1_bench_agnn.csv`.
 
-<!-- > +  Pass the `--model` parameter in `dgl_main.py` with `gcn` and  `gin` to profile the example GCN and GIN model, respectively;
-> + `./0_bench.py| tee run_dgl.log` to run the script and the report 10 epoch runtime for all evaluated datasets. 
-> + `./1_log2csv.py` to convert the `run_dgl.log` to `run_dgl.csv` for ease of visualization. -->
+## Running **PyG** baseline (Fig-6b).
+> + Go to **`pyg_baseline/`** directory;
+> + `./0_run_pyg.sh`to run all pyg experiments.
+> + Check the results in **`1_bench_gcn.csv`** and **`1_bench_agnn.csv`**, which are similar as below.
+
 
 ## Running **TC-GNN**.
 > +  Go to project root directory.
 > + `./0_run_tcgnn.sh`to run all dgl experiments.
 > + Check the results in `1_bench_gcn.csv` and `1_bench_agnn.csv`.
 
-<!-- > +  Under the current project directory 
-> + `./0_bench.py| tee run_TCGNN.log` to run the script and the report 10 epoch runtime for all evaluated datasets. 
-> + `./1_log2csv.py` to convert the `run_TCGNN.log` to `run_TCGNN.csv` for ease of analysis. -->
+## Dense Tile Reduction (Fig-7).
+```
+python 3_cnt_TC_blk_SDDMM.py
+python 3_cnt_TC_blk_SpMM.py
+```
++ Check the results in `3_cnt_TC_blk_SDDMM.csv` and `3_cnt_TC_blk_SDDMM.csv`.
+
+
+## cuSPARSE-bSpMM Baseline (Fig-6c) 
+```
+cd TCGNN-bSpmm/cusparse
+./0_run_bSpMM.sh
+```
++ Check the results in `1_run_bSpMM.csv`.
+
+
+## tSparse Baseline (Table-5, column-2).
+```
+cd TCGNN-tsparse/
+./0_run_tSparse.sh
+```
++ Check the results in `1_run_tSparse.csv`.
+
+## Triton Baseline (Table-5, column-3).
+```
+cd TCGNN-trition/python/bench
+./0_run_triton
+```
++ Check the results in `1_run_triton.csv`.
+
 
 ## Reference.
 + [**Deep Graph Library**](https://github.com/dmlc/dgl) <br>
diff --git a/TCGNN-bSpmm b/TCGNN-bSpmm
new file mode 160000
index 0000000..6d72df1
--- /dev/null
+++ b/TCGNN-bSpmm
@@ -0,0 +1 @@
+Subproject commit 6d72df1cfcd96fc01a7dadfcfe79bce680f7342c
diff --git a/TCGNN-trition b/TCGNN-trition
new file mode 160000
index 0000000..cdc9bfa
--- /dev/null
+++ b/TCGNN-trition
@@ -0,0 +1 @@
+Subproject commit cdc9bfa46556bb8f46b95b16b86b6bb05c038584
diff --git a/TCGNN-tsparse b/TCGNN-tsparse
new file mode 160000
index 0000000..b0fbee1
--- /dev/null
+++ b/TCGNN-tsparse
@@ -0,0 +1 @@
+Subproject commit b0fbee12de357e02e9f1e5d26862be4d263fe198
diff --git a/count_TC_blocks.py b/count_TC_blocks.py
deleted file mode 100755
index 37338b3..0000000
--- a/count_TC_blocks.py
+++ /dev/null
@@ -1,137 +0,0 @@
-#!/usr/bin/env python3
-import subprocess
-import datetime
-import os
-# import matplotlib.pyplot as plt
-from collections import defaultdict
-import sys 
-import numpy as np
-import math
-
-dense_tile_H = 8
-dense_tile_W = 8
-
-dataset = [
-		# ('toy'	        , 3	    , 2   ),  
-		# ('tc_gnn_verify'	, 16	, 2),
-		# ('tc_gnn_verify_2x'	, 16	, 2),
-
-		# ('citeseer'	        		, 3703	    , 6   ),  
-		# ('cora' 	        		, 1433	    , 7   ),  
-		# ('pubmed'	        		, 500	    , 3   ),      
-		# ('ppi'	            		, 50	    , 121 ),   
-		
-		# ('PROTEINS_full'             , 29       , 2) ,   
-		# ('OVCAR-8H'                  , 66       , 2) , 
-		# ('Yeast'                     , 74       , 2) ,
-		# ('DD'                        , 89       , 2) ,
-		# ('YeastH'                    , 75       , 2) ,   
-		# ('SW-620H'                   , 66       , 2) ,
-
-		# ( 'amazon0505'               , 96	  , 22),
-		# ( 'artist'                   , 100	  , 12),
-		# ( 'com-amazon'               , 96	  , 22),
-		( 'soc-BlogCatalog'	         , 128	  , 39),      
-		( 'amazon0601'  	         , 96	  , 22), 
-
-
-		# ( 'web-BerkStan'             , 100	  , 12),
-		# ( 'Reddit'                   , 602    , 41),
-
-		# ( 'wiki-topcats'             , 300	  , 12),
-		# ( 'COLLAB'                   , 100      , 3) ,
-		# ( 'wiki-topcats'             , 300	  , 12),
-		# ( 'Reddit'                   , 602      , 41),
-		# ( 'enwiki-2013'	           , 100	  , 12),      
-		# ( 'amazon_also_bought'       , 96       , 22),
-]
-
-
-data_dir = '/home/yuke/.graphs/orig/'
-# print(data_dir)
-# print("dataset,origin,origin_eff,reduced,reduced_eff,reduction (%)")
-
-def find_dense(path, data):
-	fp = open(path)
-	nodes = set()
-
-	graph = defaultdict(list)
-	for line in fp:
-		src, dst = line.strip('\n').split(" ")
-		src, dst = int(src), int(dst)
-		nodes.add(src)
-		nodes.add(dst)
-		graph[dst].append(src)
-	num_nodes = max(nodes)
-
-
-	# blk_H = math.ceil(num_nodes/dense_tile_H)
-	# blk_W = math.ceil(num_nodes/dense_tile_W)
-
-	# print(blk_H * blk_W)
-	# tiles = [0] * (blk_H * blk_W)
-
-	# for src, dst in edges:
-	# 	blk_id_H = math.floor(src/dense_tile_H)
-	# 	blk_id_W = math.floor(dst/dense_tile_W)
-	# 	global_blk_idx = blk_id_H * blk_W + blk_id_W
-	# 	tiles[global_blk_idx] += 1
-	tile_cnt = 0
-	opt_cnt = 0
-	chunk_edges = []
-	for src_iter in range(0, num_nodes, dense_tile_H):
-
-		dst_list = []
-		for src in range(src_iter, src_iter + dense_tile_H):
-			dst_list += graph[src]
-
-		actual_cnt = len(dst_list)
-		chunk_edges.append(len(dst_list))
-
-		range_set = sorted(list(set(dst_list)))
-
-		# TC-GNN tiles
-		opt_cnt += (len(range_set) + dense_tile_W - 1)//dense_tile_W
-		tmp_opt_cnt = (len(range_set) + dense_tile_W - 1)//dense_tile_W
-		exp_opt_cnt = (dense_tile_H * dense_tile_W) * tmp_opt_cnt
-
-
-		# naive sliding window without compression.
-		tmp = 0
-		range_set = sorted(list(range_set))
-		i = j = 0
-		while i < len(range_set) and j < len(range_set):
-			end = range_set[i] + dense_tile_W
-			while j < len(range_set) and range_set[j] < end:
-				j += 1
-			i = j
-			tile_cnt += 1
-			tmp += 1
-
-		exp_tile_cnt =  (dense_tile_H * dense_tile_W) * tile_cnt
-
-		if tmp < tmp_opt_cnt:
-			print(range_set)
-			print(tmp, tmp_opt_cnt)
-			print("tmp < tmp_opt_cnt Error Encounter, Duplicate Edges")
-			sys.exit(0)
-
-	# print("{:10},Avg.Chunk.Size: {:.2f}".format(data, np.mean(chunk_edges)))
-	# print("{},{},{:.2f},{},{:.2f},{:.2f}".format(data, tile_cnt, \
-	# 											actual_cnt/exp_tile_cnt, \
-	# 											opt_cnt, actual_cnt/exp_opt_cnt,  \
-	# 											100 * (tile_cnt - opt_cnt) / tile_cnt))
-
-
-	naive_blockPerRow = math.ceil(tile_cnt/(num_nodes//dense_tile_H))
-	tcgnn_blockPerRow = math.ceil(opt_cnt/(num_nodes//dense_tile_H))
-	print("{},{},{}".format(data, naive_blockPerRow, tcgnn_blockPerRow))
-
-	# plt.hist(tiles, bins=100)
-	# plt.savefig("{}.pdf".format(data))
-	# print(Counter(tiles))
-	# return tiles
-if __name__ == '__main__':
-	print("Dataset,Naive BPW,TC-GNN BPW")
-	for data, d, c in dataset:
-		find_dense(data_dir + data, data)
\ No newline at end of file