This repository has been archived by the owner on Aug 30, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 38
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add int4 dequantzie kernel * Sync ipex * xetla int4 dequantize kernel remove sw barrier --------- Co-authored-by: Ding, Yi1 <yi1.ding@intel.com>
- Loading branch information
1 parent
f7712e0
commit b0efdf4
Showing
14 changed files
with
986 additions
and
99 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
/******************************************************************************* | ||
* Copyright (c) 2023-2024 Intel Corporation | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*******************************************************************************/ | ||
|
||
/// @file | ||
/// C++ API | ||
|
||
#pragma once | ||
|
||
#include <experimental/kernel/int4_dequantize/config.hpp> | ||
|
||
namespace gpu::xetla::kernel { | ||
|
||
/// @brief | ||
/// | ||
/// @tparam dtype_qweight_ qweight data type. | ||
/// @tparam dtype_scale_ scale data type. | ||
/// @tparam dtype_zp_ zero point data | ||
/// @tparam dtype_dequant_weight_ dequant_weight data type. | ||
/// @tparam mem_layout_dequant_weight_ dequant_weight memory layout. | ||
/// @tparam quant_info quant_mode, blocksize, qweight_layout info. | ||
/// @tparam int4_dequantize_attr_ parallel-related attribute. | ||
/// @tparam arch_ HW architecture. | ||
template < | ||
typename dtype_qweight_, | ||
typename dtype_scale_, | ||
typename dtype_zp_, | ||
typename dtype_dequant_weight_, | ||
mem_layout mem_layout_qweight_, | ||
mem_layout mem_layout_scale_, | ||
mem_layout mem_layout_zp_, | ||
mem_layout mem_layout_dequant_weight_, | ||
quant_info quant_info_, | ||
typename int4_dequantize_attr_, | ||
gpu_arch arch_, | ||
typename enable = void> | ||
struct int4_dequantize_t {}; | ||
|
||
} // namespace gpu::xetla::kernel |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
/******************************************************************************* | ||
* Copyright (c) 2023-2024 Intel Corporation | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*******************************************************************************/ | ||
|
||
/// @file | ||
/// C++ API | ||
|
||
#pragma once | ||
|
||
#include <common/common.hpp> | ||
#include <group/group.hpp> | ||
#include <subgroup/subgroup.hpp> | ||
|
||
namespace gpu::xetla::kernel { | ||
|
||
/// @brief Sets up attribute of the int4 dequantize. | ||
/// | ||
/// @tparam wg_tile_n_ Is the N-dim of KxN weight processed by one workgroup. | ||
/// @tparam wg_tile_k_ Is the K-dim of KxN weight processed by one workgroup. | ||
/// @tparam sg_tile_n_ Is the N-dim of KxN weight processed by one subgroup. | ||
/// @tparam sg_tile_k_ Is the K-dim of KxN weight processed by one subgroup. | ||
/// @tparam load_block_size_ Is the size of block when load x dimenstion. | ||
/// kernels have spills. | ||
template < | ||
uint32_t wg_tile_n_, | ||
uint32_t wg_tile_k_, | ||
uint32_t sg_tile_n_, | ||
uint32_t sg_tile_k_, | ||
uint32_t k_stride_> | ||
struct int4_dequantize_attr_t { | ||
static constexpr uint32_t wg_tile_n = wg_tile_n_; | ||
static constexpr uint32_t wg_tile_k = wg_tile_k_; | ||
static constexpr uint32_t sg_tile_n = sg_tile_n_; | ||
static constexpr uint32_t sg_tile_k = sg_tile_k_; | ||
static constexpr uint32_t k_stride = k_stride_; | ||
}; | ||
|
||
} // namespace gpu::xetla::kernel |
24 changes: 24 additions & 0 deletions
24
include/experimental/kernel/int4_dequantize/int4_dequantize.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
/******************************************************************************* | ||
* Copyright (c) 2023-2024 Intel Corporation | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*******************************************************************************/ | ||
|
||
/// @file | ||
/// C++ API | ||
|
||
#pragma once | ||
|
||
#include <experimental/kernel/int4_dequantize/api.hpp> | ||
#include <experimental/kernel/int4_dequantize/config.hpp> | ||
#include <experimental/kernel/int4_dequantize/int4_dequantize_xe_impl.hpp> |
Oops, something went wrong.