Skip to content

Commit

Permalink
update with README
Browse files Browse the repository at this point in the history
  • Loading branch information
Yuke Wang committed May 7, 2023
1 parent 26b03f5 commit 0d40b53
Show file tree
Hide file tree
Showing 8 changed files with 255 additions and 170 deletions.
9 changes: 9 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[submodule "TCGNN-bSpmm"]
path = TCGNN-bSpmm
url = git@github.com:YukeWang96/TCGNN-bSpmm.git
[submodule "TCGNN-tsparse"]
path = TCGNN-tsparse
url = git@github.com:YukeWang96/TCGNN-tsparse.git
[submodule "TCGNN-trition"]
path = TCGNN-trition
url = git@github.com:YukeWang96/TCGNN-trition.git
104 changes: 104 additions & 0 deletions 3_cnt_TC_blk_SDDMM.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#!/usr/bin/env python3
import subprocess
import datetime
import os
from collections import defaultdict
import sys
import numpy as np
import math

dense_tile_H = 16
dense_tile_W = 16

dataset = [
('citeseer' , 3703 , 6 ),
('cora' , 1433 , 7 ),
('pubmed' , 500 , 3 ),
('ppi' , 50 , 121 ),

('PROTEINS_full' , 29 , 2) ,
('OVCAR-8H' , 66 , 2) ,
('Yeast' , 74 , 2) ,
('DD' , 89 , 2) ,
('YeastH' , 75 , 2) ,

( 'amazon0505' , 96 , 22),
( 'artist' , 100 , 12),
( 'com-amazon' , 96 , 22),
( 'soc-BlogCatalog' , 128 , 39),
( 'amazon0601' , 96 , 22),
]


data_dir = './tcgnn-ae-graphs/'
print("dataset,origin,reduced,reduction (%)")
fout = open("3_cnt_TC_blk_SDDMM.csv", "w")
fout.write("dataset,origin,reduced,reduction (%)\n")

def find_dense(path, data):
nodes = set()

graph = defaultdict(list)
graph_obj = np.load(path+'.npz', allow_pickle=True)
src_li = graph_obj['src_li']
dst_li = graph_obj['dst_li']
num_nodes = graph_obj['num_nodes']

for src, dst in zip(src_li, dst_li):
nodes.add(src)
nodes.add(dst)
graph[dst].append(src)

tile_cnt = 0
opt_cnt = 0
chunk_edges = []
for src_iter in range(0, num_nodes, dense_tile_H):

dst_list = []
for src in range(src_iter, src_iter + dense_tile_H):
dst_list += graph[src]

actual_cnt = len(dst_list)
chunk_edges.append(len(dst_list))

range_set = sorted(list(set(dst_list)))

# TC-GNN tiles
opt_cnt += (len(range_set) + dense_tile_W - 1)//dense_tile_W
tmp_opt_cnt = (len(range_set) + dense_tile_W - 1)//dense_tile_W
exp_opt_cnt = (dense_tile_H * dense_tile_W) * tmp_opt_cnt


# naive sliding window without compression.
tmp = 0
range_set = sorted(list(range_set))
i = j = 0
while i < len(range_set) and j < len(range_set):
end = range_set[i] + dense_tile_W
while j < len(range_set) and range_set[j] < end:
j += 1
i = j
tile_cnt += 1
tmp += 1

exp_tile_cnt = (dense_tile_H * dense_tile_W) * tile_cnt

if tmp < tmp_opt_cnt:
print(range_set)
print(tmp, tmp_opt_cnt)
print("tmp < tmp_opt_cnt Error Encounter, Duplicate Edges")
sys.exit(0)

print("{},{},{},{:.2f}".format(data, tile_cnt, opt_cnt, \
100 * (tile_cnt - opt_cnt) / tile_cnt))

fout = open("3_cnt_TC_blk_SDDMM.csv", "a")
fout.write("{},{},{},{:.2f}\n".format(data, tile_cnt, opt_cnt, 100 * (tile_cnt - opt_cnt) / tile_cnt))


if __name__ == '__main__':
fout = open("3_cnt_TC_blk_SDDMM.csv", "w")
for data, d, c in dataset:
find_dense(data_dir + data, data)
fout.close()
print("\n\nCheck [3_cnt_TC_blk_SDDMM.csv] for results\n\n")
103 changes: 103 additions & 0 deletions 3_cnt_TC_blk_SpMM.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#!/usr/bin/env python3
import subprocess
import datetime
import os
from collections import defaultdict
import sys
import numpy as np
import math

dense_tile_H = 16
dense_tile_W = 8

dataset = [
('citeseer' , 3703 , 6 ),
('cora' , 1433 , 7 ),
('pubmed' , 500 , 3 ),
('ppi' , 50 , 121 ),

('PROTEINS_full' , 29 , 2) ,
('OVCAR-8H' , 66 , 2) ,
('Yeast' , 74 , 2) ,
('DD' , 89 , 2) ,
('YeastH' , 75 , 2) ,

( 'amazon0505' , 96 , 22),
( 'artist' , 100 , 12),
( 'com-amazon' , 96 , 22),
( 'soc-BlogCatalog' , 128 , 39),
( 'amazon0601' , 96 , 22),
]


data_dir = './tcgnn-ae-graphs/'
print("dataset,origin,reduced,reduction (%)")
fout = open("3_cnt_TC_blk_SpMM.csv", "w")
fout.write("dataset,origin,reduced,reduction (%)\n")

def find_dense(path, data):
nodes = set()

graph = defaultdict(list)
graph_obj = np.load(path+'.npz', allow_pickle=True)
src_li = graph_obj['src_li']
dst_li = graph_obj['dst_li']
num_nodes = graph_obj['num_nodes']

for src, dst in zip(src_li, dst_li):
nodes.add(src)
nodes.add(dst)
graph[dst].append(src)

tile_cnt = 0
opt_cnt = 0
chunk_edges = []
for src_iter in range(0, num_nodes, dense_tile_H):

dst_list = []
for src in range(src_iter, src_iter + dense_tile_H):
dst_list += graph[src]

actual_cnt = len(dst_list)
chunk_edges.append(len(dst_list))

range_set = sorted(list(set(dst_list)))

# TC-GNN tiles
opt_cnt += (len(range_set) + dense_tile_W - 1)//dense_tile_W
tmp_opt_cnt = (len(range_set) + dense_tile_W - 1)//dense_tile_W
exp_opt_cnt = (dense_tile_H * dense_tile_W) * tmp_opt_cnt


# naive sliding window without compression.
tmp = 0
range_set = sorted(list(range_set))
i = j = 0
while i < len(range_set) and j < len(range_set):
end = range_set[i] + dense_tile_W
while j < len(range_set) and range_set[j] < end:
j += 1
i = j
tile_cnt += 1
tmp += 1

exp_tile_cnt = (dense_tile_H * dense_tile_W) * tile_cnt

if tmp < tmp_opt_cnt:
print(range_set)
print(tmp, tmp_opt_cnt)
print("tmp < tmp_opt_cnt Error Encounter, Duplicate Edges")
sys.exit(0)

print("{},{},{},{:.2f}".format(data, tile_cnt, opt_cnt, 100 * (tile_cnt - opt_cnt) / tile_cnt))
fout = open("3_cnt_TC_blk_SpMM.csv", "a")
fout.write("{},{},{},{:.2f}\n".format(data, tile_cnt, opt_cnt, 100 * (tile_cnt - opt_cnt) / tile_cnt))



if __name__ == '__main__':
fout = open("3_cnt_TC_blk_SpMM.csv", "w")
for data, d, c in dataset:
find_dense(data_dir + data, data)
fout.close()
print("\n\nCheck [3_cnt_TC_blk_SpMM.csv] for results.\n\n")
69 changes: 36 additions & 33 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,49 +81,52 @@ wget https://storage.googleapis.com/graph_dataset/tcgnn-ae-graphs.tar.gz
tar -zxvf tcgnn-ae-graphs.tar.gz && rm -rf tcgnn-ae-graphs.tar.gz
```

## Running **PyG** baseline.
> + Go to **`pyg_baseline/`** directory;
> + `./0_run_pyg.sh`to run all pyg experiments.
> + Check the results in **`1_bench_gcn.csv`** and **`1_bench_agnn.csv`**, which are similar as below.
| dataset | Avg.Epoch (ms) |
|:-----------------|----------------:|
| citeseer | 10.149 |
| cora | 9.964 |
| pubmed | 10.114 |
| ppi | 13.419 |
| PROTEINS_full | 10.908 |
| OVCAR-8H | 72.636 |
| Yeast | 66.644 |
| DD | 18.972 |
| YeastH | 118.047 |
| amazon0505 | 29.731 |
| artist | 11.172 |
| com-amazon | 22.476 |
| soc-BlogCatalog | 14.971 |
| amazon0601 | 26.621 |

<!-- > + Change `run_GCN=True` or `run_GCN=False` in `0_bench.py` with `gcn` and `gin` to profile the example GCN and GIN model, respectively;
> + `./0_bench.py| tee run_pyg.log` to run the script and the report 10 epoch runtime for all evaluated datasets.
> + `./1_log2csv.py` to convert the `run_pyg.log` to `run_pyg.csv` for ease of analysis. -->

## Running **DGL** baseline.
## Running **DGL** baseline (Fig-6a).
> + Go to **`dgl_baseline/`** directory.
> + `./0_run_dgl.sh`to run all dgl experiments.
> + Check the results in `1_bench_gcn.csv` and `1_bench_agnn.csv`.
<!-- > + Pass the `--model` parameter in `dgl_main.py` with `gcn` and `gin` to profile the example GCN and GIN model, respectively;
> + `./0_bench.py| tee run_dgl.log` to run the script and the report 10 epoch runtime for all evaluated datasets.
> + `./1_log2csv.py` to convert the `run_dgl.log` to `run_dgl.csv` for ease of visualization. -->
## Running **PyG** baseline (Fig-6b).
> + Go to **`pyg_baseline/`** directory;
> + `./0_run_pyg.sh`to run all pyg experiments.
> + Check the results in **`1_bench_gcn.csv`** and **`1_bench_agnn.csv`**, which are similar as below.

## Running **TC-GNN**.
> + Go to project root directory.
> + `./0_run_tcgnn.sh`to run all dgl experiments.
> + Check the results in `1_bench_gcn.csv` and `1_bench_agnn.csv`.
<!-- > + Under the current project directory
> + `./0_bench.py| tee run_TCGNN.log` to run the script and the report 10 epoch runtime for all evaluated datasets.
> + `./1_log2csv.py` to convert the `run_TCGNN.log` to `run_TCGNN.csv` for ease of analysis. -->
## Dense Tile Reduction (Fig-7).
```
python 3_cnt_TC_blk_SDDMM.py
python 3_cnt_TC_blk_SpMM.py
```
+ Check the results in `3_cnt_TC_blk_SDDMM.csv` and `3_cnt_TC_blk_SDDMM.csv`.


## cuSPARSE-bSpMM Baseline (Fig-6c)
```
cd TCGNN-bSpmm/cusparse
./0_run_bSpMM.sh
```
+ Check the results in `1_run_bSpMM.csv`.


## tSparse Baseline (Table-5, column-2).
```
cd TCGNN-tsparse/
./0_run_tSparse.sh
```
+ Check the results in `1_run_tSparse.csv`.

## Triton Baseline (Table-5, column-3).
```
cd TCGNN-trition/python/bench
./0_run_triton
```
+ Check the results in `1_run_triton.csv`.


## Reference.
+ [**Deep Graph Library**](https://github.com/dmlc/dgl) <br>
Expand Down
1 change: 1 addition & 0 deletions TCGNN-bSpmm
Submodule TCGNN-bSpmm added at 6d72df
1 change: 1 addition & 0 deletions TCGNN-trition
Submodule TCGNN-trition added at cdc9bf
1 change: 1 addition & 0 deletions TCGNN-tsparse
Submodule TCGNN-tsparse added at b0fbee
Loading

0 comments on commit 0d40b53

Please sign in to comment.