Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

L0 estimates #105

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 18 additions & 7 deletions src/qonnx/analysis/inference_cost.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,10 +201,10 @@ def inference_cost_upsample(model, node, discount_sparsity):
return ret


def inference_cost(model, discount_sparsity=True):
def inference_cost(model, discount_sparsity=True, cost_breakdown=False):
"Ensure all nodes have unique names prior to calling this analysis pass."

node_costs = {}
ret, node_costs, nodes_per_optype = {}, {}, {}
zero_cost_ops = [
"MaxPool",
"AveragePool",
Expand Down Expand Up @@ -240,13 +240,24 @@ def inference_cost(model, discount_sparsity=True):
if node.op_type in inference_cost_fxn_map.keys():
node_cost = inference_cost_fxn_map[node.op_type](model, node, discount_sparsity)
node_costs[node.name] = node_cost
if node.op_type not in nodes_per_optype.keys():
new_optype = {}
new_optype[node.name] = node_cost
nodes_per_optype[node.op_type] = new_optype
else:
nodes_per_optype[node.op_type][node.name] = node_cost
elif node.op_type in zero_cost_ops:
continue
else:
unsupported_ops.add(node.op_type)

ret = aggregate_dict_keys(node_costs)
ret["unsupported"] = unsupported_ops
ret["discount_sparsity"] = discount_sparsity

total = aggregate_dict_keys(node_costs)
total["unsupported"] = unsupported_ops
total["discount_sparsity"] = discount_sparsity
ret["total_cost"] = total
if cost_breakdown:
optype_cost = {}
for optype, resources in nodes_per_optype.items():
optype_cost[optype] = aggregate_dict_keys(resources)
ret["optype_cost"] = optype_cost
ret["node_cost"] = node_costs
return ret
231 changes: 231 additions & 0 deletions src/qonnx/analysis/l0_resource_estimates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
# Copyright (c) 2024 Advanced Micro Devices, Inc.
# # All rights reserved.
# #
# # Redistribution and use in source and binary forms, with or without
# # modification, are permitted provided that the following conditions are met:
# #
# # * Redistributions of source code must retain the above copyright notice, this
# # list of conditions and the following disclaimer.
# #
# # * Redistributions in binary form must reproduce the above copyright notice,
# # this list of conditions and the following disclaimer in the documentation
# # and/or other materials provided with the distribution.
# #
# # * Neither the name of qonnx nor the names of its
# # contributors may be used to endorse or promote products derived from
# # this software without specific prior written permission.
# #
# # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from qonnx.core.datatype import DataType

"""DSP Type: a) None:
For Fixed Points and floating point
1) When dsp_type is None. All operations will be processed using LUTs.
2) LUTs are calculated using: 1.1*b_width1*b_width2
2) Example:
a) op_mac_Int4_Int2: 1.1*4*2 = 8.8 LUTs.
b) op_mac_Int8_INT8: 1.1*8*8 = 70.4 LUTs.
c) op_mac_Int8_FLOAT16: 1.1*8*16 = 140.8 LUTs
d) op_mac_FLOAT16_FLOAT16: 1.1*16*16 = 281.6 LUTs.

b) DSP48:
For Fixed Points
1) Everything less than 4 will be promoted to 4. For ex: INT2 will use the same resources as INT4.
2) INT4: One dsp48 + 200 LUTs can accomodate 4 (4*4) bit mac.
So, no of dsp's from mac's can be calculated as (0.25).mac_count + (200*0.5)*mac_count LUTs.
3) Everything between 5 and 8 will be promoted to 8, Ex: INT6 will use the same resources as INT8.
4) INT88: One dsp48 + 200 LUTs can accomodate 2 (8*8) bit mac. So,
no of dsp's from mac's can be calculated as (0.5).mac_count + (200*0.25)*mac_count LUTs.
For Floating Points
1) FLOAT32: 2 dsp + 700 LUT can accomodate 1 mac count.
2) FLOAT16: 1 dsp + 400 LUT can accomodate 1 mac count.
c) DSP58:
For Fixed Points
1) INT8: One dsp58 can accomodate 3 (8*8) bit mac.
So, no of dsp's from mac's can be calculated as (0.33)*mac_count.
2) INT4: One dsp58 can accomodate 4 (4*4) bit mac.
So, no of dsp's from mac's can be calculated as (0.25)*mac_count.
3) INT16: 1 mac count requires 1 dsp.
For Floating Points
1) FLOAT32: 1 mac count requires 1 dsp.
2) FLOAT16: 1 mac count requires 1 dsp.
Mapping strategy for On-Chip Memory (bits_per_res):
a) 1 "BRAM", 1 "BRAM36" and 1 "BRAM_36K" can accomodate 36*1024 = 36864 bits.
b) 1 "BRAM_18K" can accomodate 18*1024 = 18432 bits.
c) 1 "URAM" can accomodate 288*1024 = 294912 bits.
d) 1 LUT can accomodate 64 bits.
"""
resource_table = {
"FLOAT32": {"NONE": (0, 1100), "DSP48": (2, 700), "DSP58": (1, 0)},
"FLOAT16": {"NONE": (0, 1100), "DSP48": (1, 400), "DSP58": (1, 0)},
"INT32": {"NONE": (0, 1100), "DSP48": (1, 0), "DSP58": (1, 0)},
"INT16": {"NONE": (0, 282), "DSP48": (1, 0), "DSP58": (1, 0)},
"INT8": {"NONE": (0, 71), "DSP48": (0.5, 100), "DSP58": (0.33, 0)},
"INT4": {"NONE": (0, 18), "DSP48": (0.25, 50), "DSP58": (0.25, 0)},
}

bits_per_res = {"BRAM": 36864, "BRAM36": 36864, "BRAM_36K": 36864, "BRAM_18K": 18432, "URAM": 294912, "LUT": 64}


def ocm_resources(num_mem_bits, uram_type, bram_type, d_factor):
"""Provides an estimate about the number of urams and brams required for the
on-chip memory depending upon the distribution factor.
Args:
num_mem_bits (int): Number of memory bits.
d_factor (float): Distribution factor between 0 and 1.
To distribute memory between BRAM and URAM.
bram_type (str): can be BRAM, BRAM36, BRAM_36K,BRAM_18K.
Returns:
A dictionary for ocm resources containing memory requirements for luts, brams and urams
"""
if d_factor is None:
luts_req = num_mem_bits / bits_per_res["LUT"] # neither bram nor uram.
ocm_res = {"LUT": luts_req}
elif d_factor == 1: # everything in uram.
uram_req = num_mem_bits / bits_per_res[uram_type] # URAM: 288kbit/URAM
ocm_res = {uram_type: uram_req}
elif d_factor == 0: # everything in bram (BRAM_18K/BRAM/BRAM36/BRAM_36K)
bram_req = num_mem_bits / bits_per_res[bram_type]
ocm_res = {bram_type: bram_req}
else: # both bram and uram.
uram_por, bram_por = d_factor, 1 - d_factor
bram_req = (bram_por * num_mem_bits) / bits_per_res[bram_type]
uram_req = (uram_por * num_mem_bits) / bits_per_res[uram_type]
ocm_res = {bram_type: bram_req, uram_type: uram_req}
return ocm_res


def promoting_datatype(dtype, b_width):
"""Datatype promoting criterion. Only used when DSPs are used for processing.
Args:
dtype (str): conatining "INT" or "FLOAT".
b_width (int): precision of the respective datatype.
Returns:
Returns promoted datatype and precision value."""

if "INT" in dtype:
promoted_dtype = "INT"
if b_width <= 4:
promoted_bwidth = 4
elif 4 < b_width <= 8:
promoted_bwidth = 8
elif 8 < b_width <= 16:
promoted_bwidth = 16
else:
promoted_bwidth = 32
elif "FLOAT" in dtype:
promoted_dtype = "FLOAT"
if b_width <= 16:
promoted_bwidth = 16
else:
promoted_bwidth = 32
else:
raise Exception("Unsupported data type")

return promoted_dtype, promoted_bwidth


def dtype_casting(dtype1, dtype2, b_width1, b_width2):
"""Implementing datatype promotion."""

promoted_dtype1, promoted_bwidth1 = promoting_datatype(dtype1, b_width1) # either INT or FLOAT
promoted_dtype2, promoted_bwidth2 = promoting_datatype(dtype2, b_width2)

if promoted_dtype1 == promoted_dtype2: # same datatype
if promoted_bwidth1 == promoted_bwidth2: # same precision.
dtype = promoted_dtype1 + str(promoted_bwidth1) # can also use dtype_2 + new_bwidth2
else: # different precision.
if promoted_bwidth1 >= promoted_bwidth2:
dtype = promoted_dtype1 + str(promoted_bwidth1)
else:
dtype = promoted_dtype2 + str(promoted_bwidth2)
else: # dtype_1 != dtype_2 (Different datatype and same/different precision)
if promoted_dtype1 == "FLOAT": # with different datatypes, using float and it's respective precision.
dtype = promoted_dtype1 + str(promoted_bwidth1)
else:
dtype = promoted_dtype2 + str(promoted_bwidth2)

return dtype


def core_resources(inf_cost, dsp_type, bwidth_lower_limit, bwidth_upper_limit):
"""Provide estimate resources required for the processing ("CORE"), assuming maximum unfolding.
Args:
inf_cost (dict): Inference cost dict.
dsp_type (str): None OR "DSP48" OR "DSP58". Default to None.
bwidth_lower_limit (int): Default to 8. It indicates bit values less than 8 will be processed using LUTs.
bwidth_upper_limit (int): Default to 32. It indicates bit values less than 32 will be processed using LUTs.
Returns:
A dictionary containing CORE resource estimates."""

dsp_res_mac = 0
lut_res_mac = 0
for i in inf_cost.keys():
if "op_mac" in i:
mac_count = inf_cost[i]
detail_list = i.split("_")
dtype1, dtype2 = detail_list[-1], detail_list[-2]
b_width1, b_width2 = DataType[dtype1].bitwidth(), DataType[dtype2].bitwidth()
if dsp_type is None: # Computing everything in LUTs.
lut_res_mac += 1.1 * b_width1 * b_width2 * mac_count
dsp_comp = "DSP" # default name for DSP and dsp_res_mac = 0
else: # dsp_type == "DSP48" or dsp_type == "DSP58"
if (b_width1 < bwidth_lower_limit or b_width2 < bwidth_lower_limit) or (
b_width1 > bwidth_upper_limit or b_width2 > bwidth_upper_limit
): # Computing everything in LUTs.
lut_res_mac += 1.1 * b_width1 * b_width2 * mac_count # dsp_res_mac = 0
else:
casted_dtype = dtype_casting(dtype1, dtype2, b_width1, b_width2)
casted_bwidth = DataType[casted_dtype].bitwidth()
if casted_bwidth > bwidth_upper_limit: # Computing everything in LUTs.
lut_res_mac += (
1.1 * b_width1 * b_width2 * mac_count
) # original bwidth values are used, since dsp_res_mac = 0.
else:
dsp_res_mac += (
resource_table[casted_dtype][dsp_type][0] * mac_count
) # at index zero, we expect to have dsp factor.
lut_res_mac += (
resource_table[casted_dtype][dsp_type][1] * mac_count
) # at index one, we expect to have lut factor.
dsp_comp = dsp_type # assigning name as per dsp type.
else:
continue

core_res = {"LUT": lut_res_mac, dsp_comp: dsp_res_mac}

return core_res


def l0_resource_estimates(
inf_cost, dsp_type=None, uram_type=None, bram_type=None, bwidth_lower_limit=8, bwidth_upper_limit=32, d_factor=None
):
"""Provide estimate resources required for the processing ("CORE") and memory ("OCM"), assuming maximum unfolding.
Args:
inf_cost (dict): Inference cost dict.
dsp_type (str): None OR "DSP48" OR "DSP58". Default to None.
bram_type (str): Default to "BRAM". It can be BRAM, BRAM36, BRAM_36K, BRAM_18K.
bwidth_lower_limit (int): Default to 8. It indicates bit values less than 8 will be processed using LUTs.
bwidth_upper_limit (int): Default to 32. It indicates bit values less than 32 will be processed using LUTs.
d_factor (float): Default to 1. It can have values between 0 and 1.
Returns:
A dictionary containing CORE and OCM resource estimates."""

core_res = core_resources(inf_cost, dsp_type, bwidth_lower_limit, bwidth_upper_limit)

num_mem_bits = inf_cost["total_mem_w_bits"]
ocm_res = ocm_resources(num_mem_bits, uram_type, bram_type, d_factor)

est_res_req = {"CORE": core_res, "OCM": ocm_res}

return est_res_req
77 changes: 50 additions & 27 deletions src/qonnx/util/inference_cost.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
from qonnx.transformation.infer_datatypes import InferDataTypes
from qonnx.transformation.infer_shapes import InferShapes


def compute_bops_and_macs(inf_cost_dict):
total_bops = 0.0
total_macs = 0.0
Expand All @@ -57,7 +56,6 @@ def compute_bops_and_macs(inf_cost_dict):
total_macs += v
return total_bops, total_macs


def compute_mem_bits_and_elems(inf_cost_dict, filter_string="mem_w"):
total_mem_bits = 0.0
total_mem_elems = 0.0
Expand All @@ -69,9 +67,23 @@ def compute_mem_bits_and_elems(inf_cost_dict, filter_string="mem_w"):
total_mem_elems += v
return total_mem_bits, total_mem_elems

def assign_mem_bits_and_elems(res_dict):
mem_w_bits, mem_w_elems = compute_mem_bits_and_elems(res_dict, "mem_w")
mem_o_bits, mem_o_elems = compute_mem_bits_and_elems(res_dict, "mem_o")
res_dict["total_mem_w_bits"] = mem_w_bits
res_dict["total_mem_w_elems"] = mem_w_elems
res_dict["total_mem_o_bits"] = mem_o_bits
res_dict["total_mem_o_elems"] = mem_o_elems
return res_dict

def inference_cost(
model_filename_or_wrapper, *, output_json=None, output_onnx=None, preprocess=True, discount_sparsity=True
model_filename_or_wrapper,
*,
output_json=None,
output_onnx=None,
preprocess=True,
discount_sparsity=True,
cost_breakdown=False
):
"""Return the inference cost estimate metric for given ONNX model.
Supports the Quant op for weight/activation quantization.
Expand All @@ -83,8 +95,9 @@ def inference_cost(
:param preprocess: If set, run preprocessing steps such as shape inference,
datatype inference and constant folding. Strongly recommended.
:param discount_sparsity: If set, will discount op cost of MAC ops with a
constant zero weight, and the mem cost of constant zero weights.
"""
constant zero weight, and the mem cost of constant zero weights."""

combined_results = {}
if isinstance(model_filename_or_wrapper, ModelWrapper):
model = model_filename_or_wrapper
else:
Expand All @@ -104,30 +117,40 @@ def inference_cost(
model = model.transform(GiveReadableTensorNames())
if output_onnx is not None:
model.save(output_onnx)
ret = model.analysis(lambda x: infca.inference_cost(x, discount_sparsity))
bops, macs = compute_bops_and_macs(ret)
mem_w_bits, mem_w_elems = compute_mem_bits_and_elems(ret, "mem_w")
mem_o_bits, mem_o_elems = compute_mem_bits_and_elems(ret, "mem_o")
ret["total_bops"] = bops
ret["total_macs"] = macs
ret["total_mem_w_bits"] = mem_w_bits
ret["total_mem_w_elems"] = mem_w_elems
ret["total_mem_o_bits"] = mem_o_bits
ret["total_mem_o_elems"] = mem_o_elems

if "unsupported" in ret:
ret["unsupported"] = str(ret["unsupported"])

if output_json is not None:
with open(output_json, "w") as f:
json.dump(ret, f, sort_keys=True, indent=2)

return ret


ret = model.analysis(lambda x: infca.inference_cost(x, discount_sparsity,
cost_breakdown))
for i, res in ret.items():
if i == "total_cost":
bops, macs = compute_bops_and_macs(res)
res = assign_mem_bits_and_elems(res)
res["total_bops"] = bops
res["total_macs"] = macs
if "unsupported" in res:
res["unsupported"] = str(res["unsupported"])
if output_json is not None:
with open(output_json, "w") as f:
json.dump(res, f, sort_keys=True, indent=2)
combined_results[i] = res
elif i == "optype_cost":
per_optype_breakdown = {}
for optype, op_res in res.items():
bops, macs = compute_bops_and_macs(op_res)
op_res = assign_mem_bits_and_elems(op_res)
op_res["total_bops"] = bops
op_res["total_macs"] = macs
per_optype_breakdown[optype] = op_res
combined_results[i] = per_optype_breakdown
else:
per_node_breakdown = {}
for node_name in res.keys():
node_res = res[node_name]
node_res = assign_mem_bits_and_elems(node_res)
per_node_breakdown[node_name] = node_res
combined_results[i] = per_node_breakdown
return combined_results

def main():
clize.run(inference_cost)


if __name__ == "__main__":
main()
Loading
Loading