-
Notifications
You must be signed in to change notification settings - Fork 40
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Harish
committed
Feb 22, 2024
1 parent
39442cb
commit 656b946
Showing
4 changed files
with
556 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,218 @@ | ||
# Copyright (c) 2024 Advanced Micro Devices, Inc. | ||
# # All rights reserved. | ||
# # | ||
# # Redistribution and use in source and binary forms, with or without | ||
# # modification, are permitted provided that the following conditions are met: | ||
# # | ||
# # * Redistributions of source code must retain the above copyright notice, this | ||
# # list of conditions and the following disclaimer. | ||
# # | ||
# # * Redistributions in binary form must reproduce the above copyright notice, | ||
# # this list of conditions and the following disclaimer in the documentation | ||
# # and/or other materials provided with the distribution. | ||
# # | ||
# # * Neither the name of qonnx nor the names of its | ||
# # contributors may be used to endorse or promote products derived from | ||
# # this software without specific prior written permission. | ||
# # | ||
# # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
# # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
# # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
# # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | ||
# # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
# # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
# # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||
# # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
# # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
# # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
|
||
from qonnx.core.datatype import DataType | ||
|
||
"""DSP Type: a) None: | ||
For Fixed Points and floating point | ||
1) When dsp_type is None. All operations will be processed using LUTs. | ||
2) LUTs are calculated using: 1.1*b_width1*b_width2 | ||
2) Example: | ||
a) op_mac_Int4_Int2: 1.1*4*2 = 8.8 LUTs. | ||
b) op_mac_Int8_INT8: 1.1*8*8 = 70.4 LUTs. | ||
c) op_mac_Int8_FLOAT16: 1.1*8*16 = 140.8 LUTs | ||
d) op_mac_FLOAT16_FLOAT16: 1.1*16*16 = 281.6 LUTs. | ||
b) DSP48: | ||
For Fixed Points | ||
1) Everything less than 4 will be promoted to 4. For ex: INT2 will use the same resources as INT4. | ||
2) INT4: One dsp48 + 200 LUTs can accomodate 4 (4*4) bit mac. | ||
So, no of dsp's from mac's can be calculated as (0.25).mac_count + (200*0.5)*mac_count LUTs. | ||
3) Everything between 5 and 8 will be promoted to 8, Ex: INT6 will use the same resources as INT8. | ||
4) INT88: One dsp48 + 200 LUTs can accomodate 2 (8*8) bit mac. So, | ||
no of dsp's from mac's can be calculated as (0.5).mac_count + (200*0.25)*mac_count LUTs. | ||
For Floating Points | ||
1) FLOAT32: 2 dsp + 700 LUT can accomodate 1 mac count. | ||
2) FLOAT16: 1 dsp + 400 LUT can accomodate 1 mac count. | ||
c) DSP58: | ||
For Fixed Points | ||
1) INT8: One dsp58 can accomodate 3 (8*8) bit mac. | ||
So, no of dsp's from mac's can be calculated as (0.33)*mac_count. | ||
2) INT4: One dsp58 can accomodate 4 (4*4) bit mac. | ||
So, no of dsp's from mac's can be calculated as (0.25)*mac_count. | ||
3) INT16: 1 mac count requires 1 dsp. | ||
For Floating Points | ||
1) FLOAT32: 1 mac count requires 1 dsp. | ||
2) FLOAT16: 1 mac count requires 1 dsp. | ||
""" | ||
resource_table = { | ||
"FLOAT32": {"NONE": (0, 1100), "DSP48": (2, 700), "DSP58": (1, 0)}, | ||
"FLOAT16": {"NONE": (0, 1100), "DSP48": (1, 400), "DSP58": (1, 0)}, | ||
"INT32": {"NONE": (0, 1100), "DSP48": (1, 0), "DSP58": (1, 0)}, | ||
"INT16": {"NONE": (0, 282), "DSP48": (1, 0), "DSP58": (1, 0)}, | ||
"INT8": {"NONE": (0, 71), "DSP48": (0.5, 100), "DSP58": (0.33, 0)}, | ||
"INT4": {"NONE": (0, 18), "DSP48": (0.25, 50), "DSP58": (0.25, 0)}, | ||
} | ||
|
||
|
||
def resource_distribution(num_mem_bits, bram_type, d_fator): | ||
"""Distributes on chip memory between BRAM and URAM based on the distribution factor. | ||
Args: | ||
num_mem_bits (int): Number of memory bits. | ||
d_factor (float): Distribution factor between 0 and 1. | ||
To distribute memory between BRAM and URAM. | ||
bram_type (str): can be BRAM, BRAM36, BRAM_36K,BRAM_18K. | ||
Returns: | ||
A dictionary containing memory requirements for brams and urams | ||
""" | ||
|
||
uram_type = "URAM" | ||
|
||
if d_fator == 1: # everything in uram. | ||
uram_req = num_mem_bits / (288 * 1024) # URAM: 288kbit/URAM | ||
ocm_res = {uram_type: uram_req} | ||
elif d_fator == 0: # everything in brams (BRAM_18K/BRAM/BRAM36/BRAM_36K) | ||
if bram_type in ["BRAM", "BRAM36", "BRAM_36K"]: | ||
bram_req = num_mem_bits / (36 * 1024) # BRAM: 36Kbit/BRAM | ||
else: | ||
bram_req = num_mem_bits / (18 * 1024) # BRAM_18K: 18Kbit/BRAM | ||
ocm_res = {bram_type: bram_req} | ||
else: # both bram and uram. | ||
uram_por, bram_por = d_fator, 1 - d_fator | ||
if bram_type in ["BRAM", "BRAM36", "BRAM_36K"]: | ||
bram_req = (bram_por * num_mem_bits) / (36 * 1024) # BRAM: 36Kbit/BRAM | ||
else: | ||
bram_req = (bram_por * num_mem_bits) / (18 * 1024) # BRAM_18K: 18Kbit/BRAM | ||
uram_req = (uram_por * num_mem_bits) / (288 * 1024) | ||
ocm_res = {bram_type: bram_req, uram_type: uram_req} | ||
|
||
return ocm_res | ||
|
||
|
||
def promoting_datatype(dtype, b_width): | ||
"""Datatype promoting criterion. Only used when DSPs are used for processing. | ||
Args: | ||
dtype (str): conatining "INT" or "FLOAT". | ||
b_width (int): precision of the respective datatype. | ||
Returns: | ||
Returns promoted datatype and precision value.""" | ||
|
||
if "INT" in dtype: | ||
promoted_dtype = "INT" | ||
if b_width <= 4: | ||
promoted_bwidth = 4 | ||
elif 4 < b_width <= 8: | ||
promoted_bwidth = 8 | ||
elif 8 < b_width <= 16: | ||
promoted_bwidth = 16 | ||
else: | ||
promoted_bwidth = 32 | ||
elif "FLOAT" in dtype: | ||
promoted_dtype = "FLOAT" | ||
if b_width <= 16: | ||
promoted_bwidth = 16 | ||
else: | ||
promoted_bwidth = 32 | ||
else: | ||
raise Exception("Unsupported data type") | ||
|
||
return promoted_dtype, promoted_bwidth | ||
|
||
|
||
def dtype_casting(dtype1, dtype2, b_width1, b_width2): | ||
"""Implementing datatype promotion.""" | ||
|
||
promoted_dtype1, promoted_bwidth1 = promoting_datatype(dtype1, b_width1) # either INT or FLOAT | ||
promoted_dtype2, promoted_bwidth2 = promoting_datatype(dtype2, b_width2) | ||
|
||
if promoted_dtype1 == promoted_dtype2: # same datatype | ||
if promoted_bwidth1 == promoted_bwidth2: # same precision. | ||
dtype = promoted_dtype1 + str(promoted_bwidth1) # can also use dtype_2 + new_bwidth2 | ||
else: # different precision. | ||
if promoted_bwidth1 >= promoted_bwidth2: | ||
dtype = promoted_dtype1 + str(promoted_bwidth1) | ||
else: | ||
dtype = promoted_dtype2 + str(promoted_bwidth2) | ||
else: # dtype_1 != dtype_2 (Different datatype and same/different precision) | ||
if promoted_dtype1 == "FLOAT": # with different datatypes, using float and it's respective precision. | ||
dtype = promoted_dtype1 + str(promoted_bwidth1) | ||
else: | ||
dtype = promoted_dtype2 + str(promoted_bwidth2) | ||
|
||
return dtype | ||
|
||
|
||
def l0_resource_estimates(inf_cost, dsp_type=None, bram_type="BRAM", bwidth_lower_limit=8, bwidth_upper_limit=32, d_fator=1): | ||
"""Provide estimate resources required for the processing ("CORE") and memory ("OCM"), assuming maximum unfolding. | ||
Args: | ||
resource_table (dict): Defining the resources required. | ||
inf_cost (dict): Inference cost dict. | ||
dsp_type (str): None OR "DSP48" OR "DSP58". Default to None. | ||
bram_type (str): Default to "BRAM". It can be BRAM, BRAM36, BRAM_36K, BRAM_18K. | ||
bwidth_lower_limit (int): Default to 8. It indicates bit values less than 8 will be processed using LUTs. | ||
bwidth_upper_limit (int): Default to 32. It indicates bit values less than 32 will be processed using LUTs. | ||
d_factor (float): Default to 1. It can have values between 0 and 1. | ||
Returns: | ||
A dictionary containing CORE and OCM resource estimates.""" | ||
|
||
bram_type = bram_type.upper() | ||
if dsp_type is not None: | ||
dsp_type = dsp_type.upper() | ||
dsp_res_mac = 0 | ||
lut_res_mac = 0 | ||
|
||
for i in inf_cost.keys(): | ||
if "op_mac" in i: | ||
mac_count = inf_cost[i] | ||
detail_list = i.split("_") | ||
dtype1, dtype2 = detail_list[-1], detail_list[-2] | ||
b_width1, b_width2 = DataType[dtype1].bitwidth(), DataType[dtype2].bitwidth() | ||
if dsp_type is None: # Computing everything in LUTs. | ||
lut_res_mac += 1.1 * b_width1 * b_width2 * mac_count | ||
dsp_comp = "DSP" # default name for DSP and dsp_res_mac = 0 | ||
else: # dsp_type == "DSP48" or dsp_type == "DSP58" | ||
if (b_width1 < bwidth_lower_limit or b_width2 < bwidth_lower_limit) or ( | ||
b_width1 > bwidth_upper_limit or b_width2 > bwidth_upper_limit | ||
): # Computing everything in LUTs. | ||
lut_res_mac += 1.1 * b_width1 * b_width2 * mac_count # dsp_res_mac = 0 | ||
else: | ||
casted_dtype = dtype_casting(dtype1, dtype2, b_width1, b_width2) | ||
casted_bwidth = DataType[casted_dtype].bitwidth() | ||
if casted_bwidth > bwidth_upper_limit: # Computing everything in LUTs. | ||
lut_res_mac += ( | ||
1.1 * b_width1 * b_width2 * mac_count | ||
) # original bwidth values are used, since dsp_res_mac = 0. | ||
else: | ||
dsp_res_mac += ( | ||
resource_table[casted_dtype][dsp_type][0] * mac_count | ||
) # at index zero, we expect to have dsp factor. | ||
lut_res_mac += ( | ||
resource_table[casted_dtype][dsp_type][1] * mac_count | ||
) # at index one, we expect to have lut factor. | ||
|
||
dsp_comp = dsp_type # assigning name as per dsp type. | ||
|
||
elif i == "total_mem_w_bits": | ||
num_mem_bits = inf_cost["total_mem_w_bits"] | ||
ocm_res = resource_distribution(num_mem_bits, bram_type, d_fator) | ||
else: | ||
continue | ||
|
||
est_res_req = {"CORE": {"LUT": lut_res_mac, dsp_comp: dsp_res_mac}, "OCM": ocm_res} | ||
|
||
return est_res_req |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
# Copyright (c) 2024 Advanced Micro Devices, Inc. | ||
# All rights reserved. | ||
# | ||
# Redistribution and use in source and binary forms, with or without | ||
# modification, are permitted provided that the following conditions are met: | ||
# | ||
# * Redistributions of source code must retain the above copyright notice, this | ||
# list of conditions and the following disclaimer. | ||
# | ||
# * Redistributions in binary form must reproduce the above copyright notice, | ||
# this list of conditions and the following disclaimer in the documentation | ||
# and/or other materials provided with the distribution. | ||
# | ||
# * Neither the name of qonnx nor the names of its | ||
# contributors may be used to endorse or promote products derived from | ||
# this software without specific prior written permission. | ||
# | ||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | ||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
|
||
|
||
from qonnx.analysis.l0_resource_estimates import l0_resource_estimates | ||
|
||
""" Calculate the estimate amount of resources required for a model (from inference cost dict). | ||
The estimates will be divided into two parts: | ||
1) CORE: For processing | ||
2) OCM: On-Chip Memory | ||
First, a memory check is performed to verify enough memory is availble to accomodate the model on the FPGA. | ||
Then, for the resources required for processing (CORE), inference per second is calculated. | ||
Args: | ||
resource_budget (dict): Representing the resources available in a respective FPGA. | ||
inf_cost (dict): Inference cost dict. | ||
resource_estimates(): dsp_type (str), bram_type (str), bwidth_lower_limit (int), | ||
h_upper_limit (int), d_factor (float) | ||
clock_freq: Default 3MHZ. | ||
Returns: | ||
A dictionary containing CORE and OCM resource estimates. | ||
Examples: | ||
1) est_res_req: {'CORE': {'LUT': 1198735769600.0, 'DSP48': 3450357760.0}, | ||
'OCM': {'BRAM_18K': 8798, 'URAM': 672}} | ||
2) resource_budget: {'LUT': 4397752190000, 'BRAM_18K': 1182, 'URAM': 0, 'DSP48': 500000} | ||
""" | ||
resource_map = { | ||
"res_limit": { | ||
"LUT": 0.7, | ||
"BRAM": 0.80, | ||
"BRAM36": 0.80, | ||
"BRAM_36K": 0.80, | ||
"BRAM_18K": 0.80, | ||
"URAM": 0.80, | ||
"DSP48": 0.80, | ||
"DSP58": 0.80, | ||
}, | ||
"enc_lut": {"BRAM": 576, "BRAM36": 576, "BRAM_36K": 576, "BRAM_18K": 288, "URAM": 4608}, | ||
} | ||
|
||
|
||
def l0_performance_estimate( | ||
resource_budget, | ||
inf_cost, | ||
dsp_type=None, | ||
bram_type="BRAM", | ||
bwidth_lower_limit=8, | ||
bwidth_upper_limit=32, | ||
d_fator=1, | ||
clock_freq=3000000, | ||
): | ||
expected_inference = {} | ||
res_limit, enc_lut = resource_map["res_limit"], resource_map["enc_lut"] | ||
est_res_req = l0_resource_estimates(inf_cost, dsp_type, bram_type, bwidth_lower_limit, bwidth_upper_limit, d_fator) | ||
ocm_res_req, core_res_req = est_res_req["OCM"], est_res_req["CORE"] | ||
luts_for_mem = (1 - res_limit["LUT"]) * resource_budget["LUT"] # some amount of LUTs for memory requirement. | ||
|
||
for type, res in ocm_res_req.items(): | ||
if type in resource_budget.keys(): | ||
resource_tally = res_limit[type] * resource_budget[type] - res | ||
if resource_tally >= 0: # do param fit on ocm. | ||
memory_check = True | ||
else: | ||
luts_req = enc_lut[type] * abs(resource_tally) | ||
resource_tally = res_limit["LUT"] * luts_for_mem - luts_req | ||
if resource_tally >= 0: | ||
print(f"{type} out of budget, using luts") | ||
memory_check = True | ||
luts_for_mem = luts_for_mem - luts_req | ||
else: | ||
luts_for_mem = 0 | ||
memory_check = False | ||
break | ||
else: | ||
luts_req = enc_lut[type] * res | ||
resource_tally = res_limit["LUT"] * (luts_for_mem - luts_req) | ||
if resource_tally >= 0: | ||
print(f"{type} not available in the budget, using luts") | ||
luts_for_mem = luts_for_mem - luts_req | ||
memory_check = True | ||
else: | ||
luts_for_mem = 0 | ||
memory_check = False | ||
break | ||
if memory_check is True: | ||
for i in core_res_req.keys(): | ||
inf_sec = ((res_limit[i] * resource_budget[i]) / core_res_req[i]) * clock_freq | ||
expected_inference[i] = inf_sec | ||
min_infc_res = min(expected_inference, key=expected_inference.get) | ||
min_infc_sec = expected_inference[min_infc_res] | ||
ret = (min_infc_res, min_infc_sec) | ||
else: | ||
ret = "Memory out of budget" | ||
return ret |
Oops, something went wrong.