Skip to content

Commit

Permalink
Add copyRegions to kp::OpTensorSyncDevice
Browse files Browse the repository at this point in the history
Signed-off-by: crydsch <crydsch@lph.zone>
  • Loading branch information
Crydsch committed Aug 17, 2023
1 parent a8feda4 commit 65c0cbb
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 5 deletions.
11 changes: 9 additions & 2 deletions src/OpTensorSyncDevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
namespace kp {

OpTensorSyncDevice::OpTensorSyncDevice(
const std::vector<std::shared_ptr<Tensor>>& tensors)
const std::vector<std::shared_ptr<Tensor>>& tensors,
const std::vector<vk::BufferCopy>& copyRegions)
{
KP_LOG_DEBUG("Kompute OpTensorSyncDevice constructor with params");

Expand All @@ -15,13 +16,15 @@ OpTensorSyncDevice::OpTensorSyncDevice(
}

this->mTensors = tensors;
this->mCopyRegions = copyRegions;
}

OpTensorSyncDevice::~OpTensorSyncDevice()
{
KP_LOG_DEBUG("Kompute OpTensorSyncDevice destructor started");

this->mTensors.clear();
this->mCopyRegions.clear();
}

void
Expand All @@ -31,7 +34,11 @@ OpTensorSyncDevice::record(const vk::CommandBuffer& commandBuffer)

for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {
this->mTensors[i]->recordCopyFromStagingToDevice(commandBuffer);
if (i < this->mCopyRegions.size()) {
this->mTensors[i]->recordCopyFromStagingToDevice(commandBuffer, this->mCopyRegions[i]);
} else {
this->mTensors[i]->recordCopyFromStagingToDevice(commandBuffer);
}
}
}
}
Expand Down
8 changes: 8 additions & 0 deletions src/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,14 @@ Tensor::recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer)
vk::DeviceSize bufferSize(this->memorySize());
vk::BufferCopy copyRegion(0, 0, bufferSize);

this->recordCopyFromStagingToDevice(commandBuffer, copyRegion);
}

void
Tensor::recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer, const vk::BufferCopy copyRegion)
{
vk::DeviceSize bufferSize(this->memorySize());

KP_LOG_DEBUG("Kompute Tensor copying data size {}.", bufferSize);

this->recordCopyBuffer(commandBuffer,
Expand Down
15 changes: 15 additions & 0 deletions src/include/kompute/Sequence.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,21 @@ class Sequence : public std::enable_shared_from_this<Sequence>
std::shared_ptr<T> op{ new T(tensors, std::forward<TArgs>(params)...) };
return this->eval(op);
}
/**
* Eval sends all the recorded and stored operations in the vector of
* operations into the gpu as a submit job with a barrier.
*
* @param tensors Vector of tensors to use for the operation
* @param copyRegions Vector of buffer regions to copy (one per tensor)
* @return shared_ptr<Sequence> of the Sequence class itself
*/
template<typename T, typename... TArgs>
std::shared_ptr<Sequence> eval(std::vector<std::shared_ptr<Tensor>> tensors,
std::vector<vk::BufferCopy> copyRegions)
{
std::shared_ptr<T> op{ new T(tensors, copyRegions) };
return this->eval(op);
}
/**
* Eval sends all the recorded and stored operations in the vector of
* operations into the gpu as a submit job with a barrier.
Expand Down
12 changes: 11 additions & 1 deletion src/include/kompute/Tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,22 @@ class Tensor
/**
* Records a copy from the internal staging memory to the device memory
* using an optional barrier to wait for the operation. This function would
* only be relevant for kp::Tensors of type eDevice.
* only be relevant for kp::Tensors of type eDevice. Copies the entire tensor.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
*/
void recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer);

/**
* Records a copy from the internal staging memory to the device memory
* using an optional barrier to wait for the operation. This function would
* only be relevant for kp::Tensors of type eDevice.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
* @param copyRegion The buffer region to copy
*/
void recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer, const vk::BufferCopy copyRegion);

/**
* Records a copy from the internal device memory to the staging memory
* using an optional barrier to wait for the operation. This function would
Expand Down
7 changes: 5 additions & 2 deletions src/include/kompute/operations/OpTensorSyncDevice.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@ class OpTensorSyncDevice : public OpBase
public:
/**
* Default constructor with parameters that provides the core vulkan
* resources and the tensors that will be used in the operation. The tensos
* resources and the tensors that will be used in the operation. The tensors
* provided cannot be of type TensorTypes::eStorage.
* Optionally for each tensor a buffer region to copy can be specified.
*
* @param tensors Tensors that will be used to create in operation.
* @param copyRegions The buffer region to copy.
*/
OpTensorSyncDevice(const std::vector<std::shared_ptr<Tensor>>& tensors);
OpTensorSyncDevice(const std::vector<std::shared_ptr<Tensor>>& tensors, const std::vector<vk::BufferCopy>& copyRegions = {});

/**
* Default destructor. This class does not manage memory so it won't be
Expand Down Expand Up @@ -58,6 +60,7 @@ class OpTensorSyncDevice : public OpBase
private:
// -------------- ALWAYS OWNED RESOURCES
std::vector<std::shared_ptr<Tensor>> mTensors;
std::vector<vk::BufferCopy> mCopyRegions;
};

} // End namespace kp
47 changes: 47 additions & 0 deletions test/TestOpTensorSync.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,50 @@ TEST(TestOpTensorSync, SyncToDeviceMemoryMultiTensor)
EXPECT_EQ(tensorB->vector(), testVec);
EXPECT_EQ(tensorC->vector(), testVec);
}

TEST(TestOpTensorSync, SyncToDeviceMemoryCopyRegion)
{

kp::Manager mgr;

std::vector<float> testVecPreA{ 1, 2, 3, 4 };
std::vector<float> testVecPostA{ 0, 1, 0, 0 };
std::vector<float> testVecPostB{ 0, 0, 0, 1 };
std::vector<float> testVecC{ 5, 6, 7, 8 };

std::shared_ptr<kp::TensorT<float>> tensorA = mgr.tensor({ 0, 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensorB = mgr.tensor({ 0, 0, 0, 0 });
std::shared_ptr<kp::TensorT<float>> tensorC = mgr.tensor({ 0, 0, 0, 0 });

EXPECT_TRUE(tensorA->isInit());
EXPECT_TRUE(tensorB->isInit());
EXPECT_TRUE(tensorC->isInit());

tensorA->setData(testVecPreA);
tensorC->setData(testVecC);

// TODO
// should use custom kp::copyRegion struct with index/number of elements instead of bytes
// how to handle out of bounds access?
// add copyRegion support to kp::OpTensorCopy
// add copyRegion support to kp::OpTensorSyncLocal
// add template specialization to sequence->record()

// vk::BufferCopy copyRegion;
// copyRegion.srcOffset = 0; // in bytes
// copyRegion.dstOffset = 1 * tensorA->dataTypeMemorySize(); // in bytes
// copyRegion.size = 1 * tensorA->dataTypeMemorySize(); // in bytes
// mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensorA, tensorC }, { copyRegion });

EXPECT_EQ(sizeof(float), tensorA->dataTypeMemorySize());
mgr.sequence()->eval<kp::OpTensorSyncDevice>({ tensorA, tensorC }, { { 0, 1 * sizeof(float), 1 * sizeof(float) } });
// tensorA on the device now looks like: [ 0, 1, 0, 0 ]

mgr.sequence()->eval<kp::OpTensorCopy>({ tensorA, tensorB }); // TODO copy only tensorA index 1 to tensorB index 2

mgr.sequence()->eval<kp::OpTensorSyncLocal>({ tensorA, tensorB, tensorC }); // TODO copy tensorB index 2 to tensorB index 3

EXPECT_EQ(tensorA->vector(), testVecPostA);
EXPECT_EQ(tensorB->vector(), testVecPostA);
EXPECT_EQ(tensorC->vector(), testVecC);
}

0 comments on commit 65c0cbb

Please sign in to comment.