diff --git a/scripts/fully_qualify_namespaces.sh b/scripts/fully_qualify_namespaces.sh new file mode 100755 index 00000000..1db962d7 --- /dev/null +++ b/scripts/fully_qualify_namespaces.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Run this from the src directory + +filepath="/path/to/file.txt" +filename=$(basename "$filepath") + +# Get the class names from each .hpp file. +# Assumes that the name of the header file is also the class name +# and that the class name is a top-level class that belongs in the +# kp namespace. +for file in $(find . -name "*.hpp"); do + class_name=$(basename "$file" .hpp) + echo "$class_name" + + # Replace anything where the class name is used as a template argument + find . \( -name "*.cpp" -o -name "*.hpp" \) -exec sed -i s/\<"$class_name"/\mPipeline && this->mPipelineCache && this->mPipelineLayout && this->mDescriptorPool && this->mDescriptorSet && @@ -21,7 +21,7 @@ Algorithm::isInit() } void -Algorithm::destroy() +kp::Algorithm::destroy() { // We don't have to free memory on destroy as it's freed by the // commandBuffer destructor if (this->mPushConstantsData) { @@ -125,7 +125,7 @@ Algorithm::destroy() } void -Algorithm::createParameters() +kp::Algorithm::createParameters() { KP_LOG_DEBUG("Kompute Algorithm createParameters started"); @@ -204,7 +204,7 @@ Algorithm::createParameters() } void -Algorithm::createShaderModule() +kp::Algorithm::createShaderModule() { KP_LOG_DEBUG("Kompute Algorithm createShaderModule started"); @@ -225,7 +225,7 @@ Algorithm::createShaderModule() } void -Algorithm::createPipeline() +kp::Algorithm::createPipeline() { KP_LOG_DEBUG("Kompute Algorithm calling create Pipeline"); @@ -321,7 +321,7 @@ Algorithm::createPipeline() } void -Algorithm::recordBindCore(const vk::CommandBuffer& commandBuffer) +kp::Algorithm::recordBindCore(const vk::CommandBuffer& commandBuffer) { KP_LOG_DEBUG("Kompute Algorithm binding pipeline"); @@ -339,7 +339,7 @@ Algorithm::recordBindCore(const vk::CommandBuffer& commandBuffer) } void -Algorithm::recordBindPush(const vk::CommandBuffer& commandBuffer) +kp::Algorithm::recordBindPush(const vk::CommandBuffer& commandBuffer) { if (this->mPushConstantsSize) { KP_LOG_DEBUG("Kompute Algorithm binding push constants memory size: {}", @@ -356,7 +356,7 @@ Algorithm::recordBindPush(const vk::CommandBuffer& commandBuffer) } void -Algorithm::recordDispatch(const vk::CommandBuffer& commandBuffer) +kp::Algorithm::recordDispatch(const vk::CommandBuffer& commandBuffer) { KP_LOG_DEBUG("Kompute Algorithm recording dispatch"); @@ -365,7 +365,7 @@ Algorithm::recordDispatch(const vk::CommandBuffer& commandBuffer) } void -Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) +kp::Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) { KP_LOG_INFO("Kompute OpAlgoCreate setting dispatch size"); @@ -389,13 +389,13 @@ Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize) } const Workgroup& -Algorithm::getWorkgroup() +kp::Algorithm::getWorkgroup() { return this->mWorkgroup; } -const std::vector>& -Algorithm::getTensors() +const std::vector>& +kp::Algorithm::getTensors() { return this->mTensors; } diff --git a/src/Manager.cpp b/src/Manager.cpp index 301b4a6f..e061942c 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -32,14 +32,14 @@ debugMessageCallback(VkDebugReportFlagsEXT /*flags*/, } #endif -Manager::Manager() +kp::Manager::Manager() : Manager(0) { } -Manager::Manager(uint32_t physicalDeviceIndex, - const std::vector& familyQueueIndices, - const std::vector& desiredExtensions) +kp::Manager::Manager(uint32_t physicalDeviceIndex, + const std::vector& familyQueueIndices, + const std::vector& desiredExtensions) { this->mManageResources = true; @@ -53,9 +53,9 @@ Manager::Manager(uint32_t physicalDeviceIndex, familyQueueIndices, physicalDeviceIndex, desiredExtensions); } -Manager::Manager(std::shared_ptr instance, - std::shared_ptr physicalDevice, - std::shared_ptr device) +kp::Manager::Manager(std::shared_ptr instance, + std::shared_ptr physicalDevice, + std::shared_ptr device) { this->mManageResources = false; @@ -69,14 +69,14 @@ Manager::Manager(std::shared_ptr instance, #endif } -Manager::~Manager() +kp::Manager::~Manager() { KP_LOG_DEBUG("Kompute Manager Destructor started"); this->destroy(); } void -Manager::destroy() +kp::Manager::destroy() { KP_LOG_DEBUG("Kompute Manager destroy() started"); @@ -90,8 +90,9 @@ Manager::destroy() if (this->mManageResources && this->mManagedSequences.size()) { KP_LOG_DEBUG("Kompute Manager explicitly running destructor for " "managed sequences"); - for (const std::weak_ptr& weakSq : this->mManagedSequences) { - if (std::shared_ptr sq = weakSq.lock()) { + for (const std::weak_ptr& weakSq : + this->mManagedSequences) { + if (std::shared_ptr sq = weakSq.lock()) { sq->destroy(); } } @@ -100,9 +101,10 @@ Manager::destroy() if (this->mManageResources && this->mManagedAlgorithms.size()) { KP_LOG_DEBUG("Kompute Manager explicitly freeing algorithms"); - for (const std::weak_ptr& weakAlgorithm : + for (const std::weak_ptr& weakAlgorithm : this->mManagedAlgorithms) { - if (std::shared_ptr algorithm = weakAlgorithm.lock()) { + if (std::shared_ptr algorithm = + weakAlgorithm.lock()) { algorithm->destroy(); } } @@ -111,8 +113,9 @@ Manager::destroy() if (this->mManageResources && this->mManagedTensors.size()) { KP_LOG_DEBUG("Kompute Manager explicitly freeing tensors"); - for (const std::weak_ptr& weakTensor : this->mManagedTensors) { - if (std::shared_ptr tensor = weakTensor.lock()) { + for (const std::weak_ptr& weakTensor : + this->mManagedTensors) { + if (std::shared_ptr tensor = weakTensor.lock()) { tensor->destroy(); } } @@ -150,7 +153,7 @@ Manager::destroy() } void -Manager::createInstance() +kp::Manager::createInstance() { KP_LOG_DEBUG("Kompute Manager creating instance"); @@ -285,32 +288,34 @@ Manager::createInstance() } void -Manager::clear() +kp::Manager::clear() { if (this->mManageResources) { this->mManagedTensors.erase( - std::remove_if(begin(this->mManagedTensors), - end(this->mManagedTensors), - [](std::weak_ptr t) { return t.expired(); }), + std::remove_if( + begin(this->mManagedTensors), + end(this->mManagedTensors), + [](std::weak_ptr t) { return t.expired(); }), end(this->mManagedTensors)); this->mManagedAlgorithms.erase( std::remove_if( begin(this->mManagedAlgorithms), end(this->mManagedAlgorithms), - [](std::weak_ptr t) { return t.expired(); }), + [](std::weak_ptr t) { return t.expired(); }), end(this->mManagedAlgorithms)); this->mManagedSequences.erase( - std::remove_if(begin(this->mManagedSequences), - end(this->mManagedSequences), - [](std::weak_ptr t) { return t.expired(); }), + std::remove_if( + begin(this->mManagedSequences), + end(this->mManagedSequences), + [](std::weak_ptr t) { return t.expired(); }), end(this->mManagedSequences)); } } void -Manager::createDevice(const std::vector& familyQueueIndices, - uint32_t physicalDeviceIndex, - const std::vector& desiredExtensions) +kp::Manager::createDevice(const std::vector& familyQueueIndices, + uint32_t physicalDeviceIndex, + const std::vector& desiredExtensions) { KP_LOG_DEBUG("Kompute Manager creating Device"); @@ -456,12 +461,12 @@ Manager::createDevice(const std::vector& familyQueueIndices, KP_LOG_DEBUG("Kompute Manager compute queue obtained"); } -std::shared_ptr -Manager::sequence(uint32_t queueIndex, uint32_t totalTimestamps) +std::shared_ptr +kp::Manager::sequence(uint32_t queueIndex, uint32_t totalTimestamps) { KP_LOG_DEBUG("Kompute Manager sequence() with queueIndex: {}", queueIndex); - std::shared_ptr sq{ new kp::Sequence( + std::shared_ptr sq{ new kp::Sequence( this->mPhysicalDevice, this->mDevice, this->mComputeQueues[queueIndex], @@ -476,19 +481,19 @@ Manager::sequence(uint32_t queueIndex, uint32_t totalTimestamps) } vk::PhysicalDeviceProperties -Manager::getDeviceProperties() const +kp::Manager::getDeviceProperties() const { return this->mPhysicalDevice->getProperties(); } std::vector -Manager::listDevices() const +kp::Manager::listDevices() const { return this->mInstance->enumeratePhysicalDevices(); } std::shared_ptr -Manager::getVkInstance() const +kp::Manager::getVkInstance() const { return this->mInstance; } diff --git a/src/OpAlgoDispatch.cpp b/src/OpAlgoDispatch.cpp index a76fbd58..81c291b6 100644 --- a/src/OpAlgoDispatch.cpp +++ b/src/OpAlgoDispatch.cpp @@ -4,7 +4,7 @@ namespace kp { -OpAlgoDispatch::~OpAlgoDispatch() +kp::OpAlgoDispatch::~OpAlgoDispatch() { KP_LOG_DEBUG("Kompute OpAlgoDispatch destructor started"); @@ -15,12 +15,12 @@ OpAlgoDispatch::~OpAlgoDispatch() } void -OpAlgoDispatch::record(const vk::CommandBuffer& commandBuffer) +kp::OpAlgoDispatch::record(const vk::CommandBuffer& commandBuffer) { KP_LOG_DEBUG("Kompute OpAlgoDispatch record called"); // Barrier to ensure the data is finished writing to buffer memory - for (const std::shared_ptr& tensor : + for (const std::shared_ptr& tensor : this->mAlgorithm->getTensors()) { tensor->recordPrimaryBufferMemoryBarrier( commandBuffer, @@ -43,13 +43,13 @@ OpAlgoDispatch::record(const vk::CommandBuffer& commandBuffer) } void -OpAlgoDispatch::preEval(const vk::CommandBuffer& /*commandBuffer*/) +kp::OpAlgoDispatch::preEval(const vk::CommandBuffer& /*commandBuffer*/) { KP_LOG_DEBUG("Kompute OpAlgoDispatch preEval called"); } void -OpAlgoDispatch::postEval(const vk::CommandBuffer& /*commandBuffer*/) +kp::OpAlgoDispatch::postEval(const vk::CommandBuffer& /*commandBuffer*/) { KP_LOG_DEBUG("Kompute OpAlgoDispatch postSubmit called"); } diff --git a/src/OpMemoryBarrier.cpp b/src/OpMemoryBarrier.cpp index 1f075a3c..6e80987a 100644 --- a/src/OpMemoryBarrier.cpp +++ b/src/OpMemoryBarrier.cpp @@ -4,8 +4,8 @@ namespace kp { -OpMemoryBarrier::OpMemoryBarrier( - const std::vector>& tensors, +kp::OpMemoryBarrier::OpMemoryBarrier( + const std::vector>& tensors, const vk::AccessFlagBits& srcAccessMask, const vk::AccessFlagBits& dstAccessMask, const vk::PipelineStageFlagBits& srcStageMask, @@ -21,19 +21,19 @@ OpMemoryBarrier::OpMemoryBarrier( KP_LOG_DEBUG("Kompute OpMemoryBarrier constructor"); } -OpMemoryBarrier::~OpMemoryBarrier() +kp::OpMemoryBarrier::~OpMemoryBarrier() { KP_LOG_DEBUG("Kompute OpMemoryBarrier destructor started"); } void -OpMemoryBarrier::record(const vk::CommandBuffer& commandBuffer) +kp::OpMemoryBarrier::record(const vk::CommandBuffer& commandBuffer) { KP_LOG_DEBUG("Kompute OpMemoryBarrier record called"); // Barrier to ensure the data is finished writing to buffer memory if (this->mBarrierOnPrimary) { - for (const std::shared_ptr& tensor : this->mTensors) { + for (const std::shared_ptr& tensor : this->mTensors) { tensor->recordPrimaryBufferMemoryBarrier(commandBuffer, this->mSrcAccessMask, this->mDstAccessMask, @@ -41,7 +41,7 @@ OpMemoryBarrier::record(const vk::CommandBuffer& commandBuffer) this->mDstStageMask); } } else { - for (const std::shared_ptr& tensor : this->mTensors) { + for (const std::shared_ptr& tensor : this->mTensors) { tensor->recordStagingBufferMemoryBarrier(commandBuffer, this->mSrcAccessMask, this->mDstAccessMask, @@ -52,13 +52,13 @@ OpMemoryBarrier::record(const vk::CommandBuffer& commandBuffer) } void -OpMemoryBarrier::preEval(const vk::CommandBuffer& /*commandBuffer*/) +kp::OpMemoryBarrier::preEval(const vk::CommandBuffer& /*commandBuffer*/) { KP_LOG_DEBUG("Kompute OpMemoryBarrier preEval called"); } void -OpMemoryBarrier::postEval(const vk::CommandBuffer& /*commandBuffer*/) +kp::OpMemoryBarrier::postEval(const vk::CommandBuffer& /*commandBuffer*/) { KP_LOG_DEBUG("Kompute OpMemoryBarrier postSubmit called"); } diff --git a/src/OpTensorCopy.cpp b/src/OpTensorCopy.cpp index 04336af8..9b9be534 100644 --- a/src/OpTensorCopy.cpp +++ b/src/OpTensorCopy.cpp @@ -5,7 +5,8 @@ namespace kp { -OpTensorCopy::OpTensorCopy(const std::vector>& tensors) +kp::OpTensorCopy::OpTensorCopy( + const std::vector>& tensors) { KP_LOG_DEBUG("Kompute OpTensorCopy constructor with params"); @@ -18,12 +19,12 @@ OpTensorCopy::OpTensorCopy(const std::vector>& tensors) kp::Tensor::TensorDataTypes dataType = this->mTensors[0]->dataType(); uint32_t size = this->mTensors[0]->size(); - for (const std::shared_ptr& tensor : tensors) { + for (const std::shared_ptr& tensor : tensors) { if (tensor->dataType() != dataType) { throw std::runtime_error(fmt::format( "Attempting to copy tensors of different types from {} to {}", - Tensor::toString(dataType), - Tensor::toString(tensor->dataType()))); + kp::Tensor::toString(dataType), + kp::Tensor::toString(tensor->dataType()))); } if (tensor->size() != size) { throw std::runtime_error(fmt::format( @@ -34,13 +35,13 @@ OpTensorCopy::OpTensorCopy(const std::vector>& tensors) } } -OpTensorCopy::~OpTensorCopy() +kp::OpTensorCopy::~OpTensorCopy() { KP_LOG_DEBUG("Kompute OpTensorCopy destructor started"); } void -OpTensorCopy::record(const vk::CommandBuffer& commandBuffer) +kp::OpTensorCopy::record(const vk::CommandBuffer& commandBuffer) { KP_LOG_DEBUG("Kompute OpTensorCopy record called"); @@ -51,13 +52,13 @@ OpTensorCopy::record(const vk::CommandBuffer& commandBuffer) } void -OpTensorCopy::preEval(const vk::CommandBuffer& /*commandBuffer*/) +kp::OpTensorCopy::preEval(const vk::CommandBuffer& /*commandBuffer*/) { KP_LOG_DEBUG("Kompute OpTensorCopy preEval called"); } void -OpTensorCopy::postEval(const vk::CommandBuffer& /*commandBuffer*/) +kp::OpTensorCopy::postEval(const vk::CommandBuffer& /*commandBuffer*/) { KP_LOG_DEBUG("Kompute OpTensorCopy postEval called"); diff --git a/src/OpTensorSyncDevice.cpp b/src/OpTensorSyncDevice.cpp index a2542357..00cee2c1 100644 --- a/src/OpTensorSyncDevice.cpp +++ b/src/OpTensorSyncDevice.cpp @@ -4,8 +4,8 @@ namespace kp { -OpTensorSyncDevice::OpTensorSyncDevice( - const std::vector>& tensors) +kp::OpTensorSyncDevice::OpTensorSyncDevice( + const std::vector>& tensors) { KP_LOG_DEBUG("Kompute OpTensorSyncDevice constructor with params"); @@ -17,7 +17,7 @@ OpTensorSyncDevice::OpTensorSyncDevice( this->mTensors = tensors; } -OpTensorSyncDevice::~OpTensorSyncDevice() +kp::OpTensorSyncDevice::~OpTensorSyncDevice() { KP_LOG_DEBUG("Kompute OpTensorSyncDevice destructor started"); @@ -25,25 +25,26 @@ OpTensorSyncDevice::~OpTensorSyncDevice() } void -OpTensorSyncDevice::record(const vk::CommandBuffer& commandBuffer) +kp::OpTensorSyncDevice::record(const vk::CommandBuffer& commandBuffer) { KP_LOG_DEBUG("Kompute OpTensorSyncDevice record called"); for (size_t i = 0; i < this->mTensors.size(); i++) { - if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) { + if (this->mTensors[i]->tensorType() == + kp::Tensor::TensorTypes::eDevice) { this->mTensors[i]->recordCopyFromStagingToDevice(commandBuffer); } } } void -OpTensorSyncDevice::preEval(const vk::CommandBuffer& /*commandBuffer*/) +kp::OpTensorSyncDevice::preEval(const vk::CommandBuffer& /*commandBuffer*/) { KP_LOG_DEBUG("Kompute OpTensorSyncDevice preEval called"); } void -OpTensorSyncDevice::postEval(const vk::CommandBuffer& /*commandBuffer*/) +kp::OpTensorSyncDevice::postEval(const vk::CommandBuffer& /*commandBuffer*/) { KP_LOG_DEBUG("Kompute OpTensorSyncDevice postEval called"); } diff --git a/src/OpTensorSyncLocal.cpp b/src/OpTensorSyncLocal.cpp index 7818db56..02612130 100644 --- a/src/OpTensorSyncLocal.cpp +++ b/src/OpTensorSyncLocal.cpp @@ -6,8 +6,8 @@ namespace kp { -OpTensorSyncLocal::OpTensorSyncLocal( - const std::vector>& tensors) +kp::OpTensorSyncLocal::OpTensorSyncLocal( + const std::vector>& tensors) { KP_LOG_DEBUG("Kompute OpTensorSyncLocal constructor with params"); @@ -19,18 +19,19 @@ OpTensorSyncLocal::OpTensorSyncLocal( this->mTensors = tensors; } -OpTensorSyncLocal::~OpTensorSyncLocal() +kp::OpTensorSyncLocal::~OpTensorSyncLocal() { KP_LOG_DEBUG("Kompute OpTensorSyncLocal destructor started"); } void -OpTensorSyncLocal::record(const vk::CommandBuffer& commandBuffer) +kp::OpTensorSyncLocal::record(const vk::CommandBuffer& commandBuffer) { KP_LOG_DEBUG("Kompute OpTensorSyncLocal record called"); for (size_t i = 0; i < this->mTensors.size(); i++) { - if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) { + if (this->mTensors[i]->tensorType() == + kp::Tensor::TensorTypes::eDevice) { this->mTensors[i]->recordPrimaryBufferMemoryBarrier( commandBuffer, @@ -52,13 +53,13 @@ OpTensorSyncLocal::record(const vk::CommandBuffer& commandBuffer) } void -OpTensorSyncLocal::preEval(const vk::CommandBuffer& /*commandBuffer*/) +kp::OpTensorSyncLocal::preEval(const vk::CommandBuffer& /*commandBuffer*/) { KP_LOG_DEBUG("Kompute OpTensorSyncLocal preEval called"); } void -OpTensorSyncLocal::postEval(const vk::CommandBuffer& /*commandBuffer*/) +kp::OpTensorSyncLocal::postEval(const vk::CommandBuffer& /*commandBuffer*/) { KP_LOG_DEBUG("Kompute OpTensorSyncLocal postEval called"); diff --git a/src/Sequence.cpp b/src/Sequence.cpp index d5d127ff..68a623b8 100644 --- a/src/Sequence.cpp +++ b/src/Sequence.cpp @@ -4,11 +4,11 @@ namespace kp { -Sequence::Sequence(std::shared_ptr physicalDevice, - std::shared_ptr device, - std::shared_ptr computeQueue, - uint32_t queueIndex, - uint32_t totalTimestamps) +kp::Sequence::Sequence(std::shared_ptr physicalDevice, + std::shared_ptr device, + std::shared_ptr computeQueue, + uint32_t queueIndex, + uint32_t totalTimestamps) { KP_LOG_DEBUG("Kompute Sequence Constructor with existing device & queue"); @@ -25,7 +25,7 @@ Sequence::Sequence(std::shared_ptr physicalDevice, 1); //+1 for the first one } -Sequence::~Sequence() +kp::Sequence::~Sequence() { KP_LOG_DEBUG("Kompute Sequence Destructor started"); @@ -35,7 +35,7 @@ Sequence::~Sequence() } void -Sequence::begin() +kp::Sequence::begin() { KP_LOG_DEBUG("Kompute sequence called BEGIN"); @@ -62,7 +62,7 @@ Sequence::begin() } void -Sequence::end() +kp::Sequence::end() { KP_LOG_DEBUG("Kompute Sequence calling END"); @@ -82,7 +82,7 @@ Sequence::end() } void -Sequence::clear() +kp::Sequence::clear() { KP_LOG_DEBUG("Kompute Sequence calling clear"); this->mOperations.clear(); @@ -91,23 +91,23 @@ Sequence::clear() } } -std::shared_ptr -Sequence::eval() +std::shared_ptr +kp::Sequence::eval() { KP_LOG_DEBUG("Kompute sequence EVAL BEGIN"); return this->evalAsync()->evalAwait(); } -std::shared_ptr -Sequence::eval(std::shared_ptr op) +std::shared_ptr +kp::Sequence::eval(std::shared_ptr op) { this->clear(); return this->record(op)->eval(); } -std::shared_ptr -Sequence::evalAsync() +std::shared_ptr +kp::Sequence::evalAsync() { if (this->isRecording()) { this->end(); @@ -138,8 +138,8 @@ Sequence::evalAsync() return shared_from_this(); } -std::shared_ptr -Sequence::evalAsync(std::shared_ptr op) +std::shared_ptr +kp::Sequence::evalAsync(std::shared_ptr op) { this->clear(); this->record(op); @@ -147,8 +147,8 @@ Sequence::evalAsync(std::shared_ptr op) return shared_from_this(); } -std::shared_ptr -Sequence::evalAwait(uint64_t waitFor) +std::shared_ptr +kp::Sequence::evalAwait(uint64_t waitFor) { if (!this->mIsRunning) { KP_LOG_WARN("Kompute Sequence evalAwait called without existing eval"); @@ -174,29 +174,29 @@ Sequence::evalAwait(uint64_t waitFor) } bool -Sequence::isRunning() const +kp::Sequence::isRunning() const { return this->mIsRunning; } bool -Sequence::isRecording() const +kp::Sequence::isRecording() const { return this->mRecording; } bool -Sequence::isInit() const +kp::Sequence::isInit() const { return this->mDevice && this->mCommandPool && this->mCommandBuffer && this->mComputeQueue; } void -Sequence::rerecord() +kp::Sequence::rerecord() { this->end(); - std::vector> ops = this->mOperations; + std::vector> ops = this->mOperations; this->mOperations.clear(); for (const std::shared_ptr& op : ops) { this->record(op); @@ -204,7 +204,7 @@ Sequence::rerecord() } void -Sequence::destroy() +kp::Sequence::destroy() { KP_LOG_DEBUG("Kompute Sequence destroy called"); @@ -278,8 +278,8 @@ Sequence::destroy() } } -std::shared_ptr -Sequence::record(std::shared_ptr op) +std::shared_ptr +kp::Sequence::record(std::shared_ptr op) { KP_LOG_DEBUG("Kompute Sequence record function started"); @@ -302,7 +302,7 @@ Sequence::record(std::shared_ptr op) } void -Sequence::createCommandPool() +kp::Sequence::createCommandPool() { KP_LOG_DEBUG("Kompute Sequence creating command pool"); @@ -321,7 +321,7 @@ Sequence::createCommandPool() } void -Sequence::createCommandBuffer() +kp::Sequence::createCommandBuffer() { KP_LOG_DEBUG("Kompute Sequence creating command buffer"); if (!this->mDevice) { @@ -343,7 +343,7 @@ Sequence::createCommandBuffer() } void -Sequence::createTimestampQueryPool(uint32_t totalTimestamps) +kp::Sequence::createTimestampQueryPool(uint32_t totalTimestamps) { KP_LOG_DEBUG("Kompute Sequence creating query pool"); if (!this->isInit()) { @@ -371,7 +371,7 @@ Sequence::createTimestampQueryPool(uint32_t totalTimestamps) } std::vector -Sequence::getTimestamps() +kp::Sequence::getTimestamps() { if (!this->timestampQueryPool) throw std::runtime_error("Timestamp latching not enabled"); diff --git a/src/Tensor.cpp b/src/Tensor.cpp index 5a906578..f1ab5e48 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -5,18 +5,18 @@ namespace kp { std::string -Tensor::toString(Tensor::TensorDataTypes dt) +kp::Tensor::toString(kp::Tensor::TensorDataTypes dt) { switch (dt) { - case TensorDataTypes::eBool: + case kp::Tensor::TensorDataTypes::eBool: return "eBool"; - case TensorDataTypes::eInt: + case kp::Tensor::TensorDataTypes::eInt: return "eInt"; - case TensorDataTypes::eUnsignedInt: + case kp::Tensor::TensorDataTypes::eUnsignedInt: return "eUnsignedInt"; - case TensorDataTypes::eFloat: + case kp::Tensor::TensorDataTypes::eFloat: return "eFloat"; - case TensorDataTypes::eDouble: + case kp::Tensor::TensorDataTypes::eDouble: return "eDouble"; default: return "unknown"; @@ -24,31 +24,31 @@ Tensor::toString(Tensor::TensorDataTypes dt) } std::string -Tensor::toString(Tensor::TensorTypes dt) +kp::Tensor::toString(kp::Tensor::TensorTypes dt) { switch (dt) { - case TensorTypes::eDevice: + case kp::Tensor::TensorTypes::eDevice: return "eDevice"; - case TensorTypes::eHost: + case kp::Tensor::TensorTypes::eHost: return "eHost"; - case TensorTypes::eStorage: + case kp::Tensor::TensorTypes::eStorage: return "eStorage"; default: return "unknown"; } } -Tensor::Tensor(std::shared_ptr physicalDevice, - std::shared_ptr device, - void* data, - uint32_t elementTotalCount, - uint32_t elementMemorySize, - const TensorDataTypes& dataType, - const TensorTypes& tensorType) +kp::Tensor::Tensor(std::shared_ptr physicalDevice, + std::shared_ptr device, + void* data, + uint32_t elementTotalCount, + uint32_t elementMemorySize, + const TensorDataTypes& dataType, + const TensorTypes& tensorType) { KP_LOG_DEBUG("Kompute Tensor constructor data length: {}, and type: {}", elementTotalCount, - Tensor::toString(tensorType)); + kp::Tensor::toString(tensorType)); this->mPhysicalDevice = physicalDevice; this->mDevice = device; @@ -58,10 +58,10 @@ Tensor::Tensor(std::shared_ptr physicalDevice, this->rebuild(data, elementTotalCount, elementMemorySize); } -Tensor::~Tensor() +kp::Tensor::~Tensor() { KP_LOG_DEBUG("Kompute Tensor destructor started. Type: {}", - Tensor::toString(this->tensorType())); + kp::Tensor::toString(this->tensorType())); if (this->mDevice) { this->destroy(); @@ -71,9 +71,9 @@ Tensor::~Tensor() } void -Tensor::rebuild(void* data, - uint32_t elementTotalCount, - uint32_t elementMemorySize) +kp::Tensor::rebuild(void* data, + uint32_t elementTotalCount, + uint32_t elementMemorySize) { KP_LOG_DEBUG("Kompute Tensor rebuilding with size {}", elementTotalCount); @@ -88,72 +88,72 @@ Tensor::rebuild(void* data, this->allocateMemoryCreateGPUResources(); - if (this->tensorType() != Tensor::TensorTypes::eStorage) { + if (this->tensorType() != kp::Tensor::TensorTypes::eStorage) { this->mapRawData(); memcpy(this->mRawData, data, this->memorySize()); } } -Tensor::TensorTypes -Tensor::tensorType() +kp::Tensor::TensorTypes +kp::Tensor::tensorType() { return this->mTensorType; } bool -Tensor::isInit() +kp::Tensor::isInit() { return this->mDevice && this->mPrimaryBuffer && this->mPrimaryMemory && this->mRawData; } uint32_t -Tensor::size() +kp::Tensor::size() { return this->mSize; } uint32_t -Tensor::dataTypeMemorySize() +kp::Tensor::dataTypeMemorySize() { return this->mDataTypeMemorySize; } uint32_t -Tensor::memorySize() +kp::Tensor::memorySize() { return this->mSize * this->mDataTypeMemorySize; } kp::Tensor::TensorDataTypes -Tensor::dataType() +kp::Tensor::dataType() { return this->mDataType; } void* -Tensor::rawData() +kp::Tensor::rawData() { return this->mRawData; } void -Tensor::setRawData(const void* data) +kp::Tensor::setRawData(const void* data) { memcpy(this->mRawData, data, this->memorySize()); } void -Tensor::mapRawData() +kp::Tensor::mapRawData() { KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); std::shared_ptr hostVisibleMemory = nullptr; - if (this->mTensorType == TensorTypes::eHost) { + if (this->mTensorType == kp::Tensor::TensorTypes::eHost) { hostVisibleMemory = this->mPrimaryMemory; - } else if (this->mTensorType == TensorTypes::eDevice) { + } else if (this->mTensorType == kp::Tensor::TensorTypes::eDevice) { hostVisibleMemory = this->mStagingMemory; } else { KP_LOG_WARN("Kompute Tensor mapping data not supported on {} tensor", @@ -170,16 +170,16 @@ Tensor::mapRawData() } void -Tensor::unmapRawData() +kp::Tensor::unmapRawData() { KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); std::shared_ptr hostVisibleMemory = nullptr; - if (this->mTensorType == TensorTypes::eHost) { + if (this->mTensorType == kp::Tensor::TensorTypes::eHost) { hostVisibleMemory = this->mPrimaryMemory; - } else if (this->mTensorType == TensorTypes::eDevice) { + } else if (this->mTensorType == kp::Tensor::TensorTypes::eDevice) { hostVisibleMemory = this->mStagingMemory; } else { KP_LOG_WARN("Kompute Tensor mapping data not supported on {} tensor", @@ -194,8 +194,8 @@ Tensor::unmapRawData() } void -Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer, - std::shared_ptr copyFromTensor) +kp::Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer, + std::shared_ptr copyFromTensor) { vk::DeviceSize bufferSize(this->memorySize()); @@ -211,7 +211,8 @@ Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer, } void -Tensor::recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer) +kp::Tensor::recordCopyFromStagingToDevice( + const vk::CommandBuffer& commandBuffer) { vk::DeviceSize bufferSize(this->memorySize()); vk::BufferCopy copyRegion(0, 0, bufferSize); @@ -226,7 +227,8 @@ Tensor::recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer) } void -Tensor::recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer) +kp::Tensor::recordCopyFromDeviceToStaging( + const vk::CommandBuffer& commandBuffer) { vk::DeviceSize bufferSize(this->memorySize()); vk::BufferCopy copyRegion(0, 0, bufferSize); @@ -241,22 +243,23 @@ Tensor::recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer) } void -Tensor::recordCopyBuffer(const vk::CommandBuffer& commandBuffer, - std::shared_ptr bufferFrom, - std::shared_ptr bufferTo, - vk::DeviceSize /*bufferSize*/, - vk::BufferCopy copyRegion) +kp::Tensor::recordCopyBuffer(const vk::CommandBuffer& commandBuffer, + std::shared_ptr bufferFrom, + std::shared_ptr bufferTo, + vk::DeviceSize /*bufferSize*/, + vk::BufferCopy copyRegion) { commandBuffer.copyBuffer(*bufferFrom, *bufferTo, copyRegion); } void -Tensor::recordPrimaryBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, - vk::AccessFlagBits srcAccessMask, - vk::AccessFlagBits dstAccessMask, - vk::PipelineStageFlagBits srcStageMask, - vk::PipelineStageFlagBits dstStageMask) +kp::Tensor::recordPrimaryBufferMemoryBarrier( + const vk::CommandBuffer& commandBuffer, + vk::AccessFlagBits srcAccessMask, + vk::AccessFlagBits dstAccessMask, + vk::PipelineStageFlagBits srcStageMask, + vk::PipelineStageFlagBits dstStageMask) { KP_LOG_DEBUG("Kompute Tensor recording PRIMARY buffer memory barrier"); @@ -269,11 +272,12 @@ Tensor::recordPrimaryBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, } void -Tensor::recordStagingBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, - vk::AccessFlagBits srcAccessMask, - vk::AccessFlagBits dstAccessMask, - vk::PipelineStageFlagBits srcStageMask, - vk::PipelineStageFlagBits dstStageMask) +kp::Tensor::recordStagingBufferMemoryBarrier( + const vk::CommandBuffer& commandBuffer, + vk::AccessFlagBits srcAccessMask, + vk::AccessFlagBits dstAccessMask, + vk::PipelineStageFlagBits srcStageMask, + vk::PipelineStageFlagBits dstStageMask) { KP_LOG_DEBUG("Kompute Tensor recording STAGING buffer memory barrier"); @@ -286,12 +290,12 @@ Tensor::recordStagingBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, } void -Tensor::recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, - const vk::Buffer& buffer, - vk::AccessFlagBits srcAccessMask, - vk::AccessFlagBits dstAccessMask, - vk::PipelineStageFlagBits srcStageMask, - vk::PipelineStageFlagBits dstStageMask) +kp::Tensor::recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, + const vk::Buffer& buffer, + vk::AccessFlagBits srcAccessMask, + vk::AccessFlagBits dstAccessMask, + vk::PipelineStageFlagBits srcStageMask, + vk::PipelineStageFlagBits dstStageMask) { KP_LOG_DEBUG("Kompute Tensor recording buffer memory barrier"); @@ -314,7 +318,7 @@ Tensor::recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, } vk::DescriptorBufferInfo -Tensor::constructDescriptorBufferInfo() +kp::Tensor::constructDescriptorBufferInfo() { KP_LOG_DEBUG("Kompute Tensor construct descriptor buffer info size {}", this->memorySize()); @@ -325,20 +329,20 @@ Tensor::constructDescriptorBufferInfo() } vk::BufferUsageFlags -Tensor::getPrimaryBufferUsageFlags() +kp::Tensor::getPrimaryBufferUsageFlags() { switch (this->mTensorType) { - case TensorTypes::eDevice: + case kp::Tensor::TensorTypes::eDevice: return vk::BufferUsageFlagBits::eStorageBuffer | vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst; break; - case TensorTypes::eHost: + case kp::Tensor::TensorTypes::eHost: return vk::BufferUsageFlagBits::eStorageBuffer | vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst; break; - case TensorTypes::eStorage: + case kp::Tensor::TensorTypes::eStorage: return vk::BufferUsageFlagBits::eStorageBuffer; break; default: @@ -347,17 +351,17 @@ Tensor::getPrimaryBufferUsageFlags() } vk::MemoryPropertyFlags -Tensor::getPrimaryMemoryPropertyFlags() +kp::Tensor::getPrimaryMemoryPropertyFlags() { switch (this->mTensorType) { - case TensorTypes::eDevice: + case kp::Tensor::TensorTypes::eDevice: return vk::MemoryPropertyFlagBits::eDeviceLocal; break; - case TensorTypes::eHost: + case kp::Tensor::TensorTypes::eHost: return vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent; break; - case TensorTypes::eStorage: + case kp::Tensor::TensorTypes::eStorage: return vk::MemoryPropertyFlagBits::eDeviceLocal; break; default: @@ -366,10 +370,10 @@ Tensor::getPrimaryMemoryPropertyFlags() } vk::BufferUsageFlags -Tensor::getStagingBufferUsageFlags() +kp::Tensor::getStagingBufferUsageFlags() { switch (this->mTensorType) { - case TensorTypes::eDevice: + case kp::Tensor::TensorTypes::eDevice: return vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst; break; @@ -379,10 +383,10 @@ Tensor::getStagingBufferUsageFlags() } vk::MemoryPropertyFlags -Tensor::getStagingMemoryPropertyFlags() +kp::Tensor::getStagingMemoryPropertyFlags() { switch (this->mTensorType) { - case TensorTypes::eDevice: + case kp::Tensor::TensorTypes::eDevice: return vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent; break; @@ -392,7 +396,7 @@ Tensor::getStagingMemoryPropertyFlags() } void -Tensor::allocateMemoryCreateGPUResources() +kp::Tensor::allocateMemoryCreateGPUResources() { KP_LOG_DEBUG("Kompute Tensor creating buffer"); @@ -415,7 +419,7 @@ Tensor::allocateMemoryCreateGPUResources() this->getPrimaryMemoryPropertyFlags()); this->mFreePrimaryMemory = true; - if (this->mTensorType == TensorTypes::eDevice) { + if (this->mTensorType == kp::Tensor::TensorTypes::eDevice) { KP_LOG_DEBUG("Kompute Tensor creating staging buffer and memory"); this->mStagingBuffer = std::make_shared(); @@ -433,8 +437,8 @@ Tensor::allocateMemoryCreateGPUResources() } void -Tensor::createBuffer(std::shared_ptr buffer, - vk::BufferUsageFlags bufferUsageFlags) +kp::Tensor::createBuffer(std::shared_ptr buffer, + vk::BufferUsageFlags bufferUsageFlags) { vk::DeviceSize bufferSize = this->memorySize(); @@ -459,9 +463,9 @@ Tensor::createBuffer(std::shared_ptr buffer, } void -Tensor::allocateBindMemory(std::shared_ptr buffer, - std::shared_ptr memory, - vk::MemoryPropertyFlags memoryPropertyFlags) +kp::Tensor::allocateBindMemory(std::shared_ptr buffer, + std::shared_ptr memory, + vk::MemoryPropertyFlags memoryPropertyFlags) { KP_LOG_DEBUG("Kompute Tensor allocating and binding memory"); @@ -504,7 +508,7 @@ Tensor::allocateBindMemory(std::shared_ptr buffer, } void -Tensor::destroy() +kp::Tensor::destroy() { KP_LOG_DEBUG("Kompute Tensor started destroy()"); @@ -521,7 +525,7 @@ Tensor::destroy() } // Unmap the current memory data - if (this->tensorType() != Tensor::TensorTypes::eStorage) { + if (this->tensorType() != kp::Tensor::TensorTypes::eStorage) { this->unmapRawData(); } @@ -589,38 +593,38 @@ Tensor::destroy() } template<> -Tensor::TensorDataTypes -TensorT::dataType() +kp::Tensor::TensorDataTypes +kp::TensorT::dataType() { - return Tensor::TensorDataTypes::eBool; + return kp::Tensor::TensorDataTypes::eBool; } template<> -Tensor::TensorDataTypes -TensorT::dataType() +kp::Tensor::TensorDataTypes +kp::TensorT::dataType() { - return Tensor::TensorDataTypes::eInt; + return kp::Tensor::TensorDataTypes::eInt; } template<> -Tensor::TensorDataTypes -TensorT::dataType() +kp::Tensor::TensorDataTypes +kp::TensorT::dataType() { - return Tensor::TensorDataTypes::eUnsignedInt; + return kp::Tensor::TensorDataTypes::eUnsignedInt; } template<> -Tensor::TensorDataTypes -TensorT::dataType() +kp::Tensor::TensorDataTypes +kp::TensorT::dataType() { - return Tensor::TensorDataTypes::eFloat; + return kp::Tensor::TensorDataTypes::eFloat; } template<> -Tensor::TensorDataTypes -TensorT::dataType() +kp::Tensor::TensorDataTypes +kp::TensorT::dataType() { - return Tensor::TensorDataTypes::eDouble; + return kp::Tensor::TensorDataTypes::eDouble; } } diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp index 1917dd37..b364653a 100644 --- a/src/include/kompute/Algorithm.hpp +++ b/src/include/kompute/Algorithm.hpp @@ -36,7 +36,7 @@ class Algorithm */ template Algorithm(std::shared_ptr device, - const std::vector>& tensors = {}, + const std::vector>& tensors = {}, const std::vector& spirv = {}, const Workgroup& workgroup = {}, const std::vector& specializationConstants = {}, @@ -81,7 +81,7 @@ class Algorithm * as this initial value. */ template - void rebuild(const std::vector>& tensors, + void rebuild(const std::vector>& tensors, const std::vector& spirv, const Workgroup& workgroup = {}, const std::vector& specializationConstants = {}, @@ -271,14 +271,14 @@ class Algorithm * * @returns The list of tensors used in the algorithm. */ - const std::vector>& getTensors(); + const std::vector>& getTensors(); void destroy(); private: // -------------- NEVER OWNED RESOURCES std::shared_ptr mDevice; - std::vector> mTensors; + std::vector> mTensors; // -------------- OPTIONALLY OWNED RESOURCES std::shared_ptr mDescriptorSetLayout; diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index 52f9ada7..2667c1ce 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -67,8 +67,8 @@ class Manager * If zero (default), disables latching of timestamps. * @returns Shared pointer with initialised sequence */ - std::shared_ptr sequence(uint32_t queueIndex = 0, - uint32_t totalTimestamps = 0); + std::shared_ptr sequence(uint32_t queueIndex = 0, + uint32_t totalTimestamps = 0); /** * Create a managed tensor that will be destroyed by this manager @@ -79,13 +79,13 @@ class Manager * @returns Shared pointer with initialised tensor */ template - std::shared_ptr> tensorT( + std::shared_ptr> tensorT( const std::vector& data, - Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + kp::Tensor::TensorTypes tensorType = kp::Tensor::TensorTypes::eDevice) { KP_LOG_DEBUG("Kompute Manager tensor creation triggered"); - std::shared_ptr> tensor{ new kp::TensorT( + std::shared_ptr> tensor{ new kp::TensorT( this->mPhysicalDevice, this->mDevice, data, tensorType) }; if (this->mManageResources) { @@ -95,27 +95,28 @@ class Manager return tensor; } - std::shared_ptr> tensor( + std::shared_ptr> tensor( const std::vector& data, - Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + kp::Tensor::TensorTypes tensorType = kp::Tensor::TensorTypes::eDevice) { return this->tensorT(data, tensorType); } - std::shared_ptr tensor( + std::shared_ptr tensor( void* data, uint32_t elementTotalCount, uint32_t elementMemorySize, - const Tensor::TensorDataTypes& dataType, - Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + const kp::Tensor::TensorDataTypes& dataType, + kp::Tensor::TensorTypes tensorType = kp::Tensor::TensorTypes::eDevice) { - std::shared_ptr tensor{ new kp::Tensor(this->mPhysicalDevice, - this->mDevice, - data, - elementTotalCount, - elementMemorySize, - dataType, - tensorType) }; + std::shared_ptr tensor{ new kp::Tensor( + this->mPhysicalDevice, + this->mDevice, + data, + elementTotalCount, + elementMemorySize, + dataType, + tensorType) }; if (this->mManageResources) { this->mManagedTensors.push_back(tensor); @@ -139,8 +140,8 @@ class Manager * and defaults to an empty constant * @returns Shared pointer with initialised algorithm */ - std::shared_ptr algorithm( - const std::vector>& tensors = {}, + std::shared_ptr algorithm( + const std::vector>& tensors = {}, const std::vector& spirv = {}, const Workgroup& workgroup = {}, const std::vector& specializationConstants = {}, @@ -165,8 +166,8 @@ class Manager * @returns Shared pointer with initialised algorithm */ template - std::shared_ptr algorithm( - const std::vector>& tensors, + std::shared_ptr algorithm( + const std::vector>& tensors, const std::vector& spirv, const Workgroup& workgroup, const std::vector& specializationConstants, @@ -175,7 +176,7 @@ class Manager KP_LOG_DEBUG("Kompute Manager algorithm creation triggered"); - std::shared_ptr algorithm{ new kp::Algorithm( + std::shared_ptr algorithm{ new kp::Algorithm( this->mDevice, tensors, spirv, @@ -232,9 +233,9 @@ class Manager bool mFreeDevice = false; // -------------- ALWAYS OWNED RESOURCES - std::vector> mManagedTensors; - std::vector> mManagedSequences; - std::vector> mManagedAlgorithms; + std::vector> mManagedTensors; + std::vector> mManagedSequences; + std::vector> mManagedAlgorithms; std::vector mComputeQueueFamilyIndices; std::vector> mComputeQueues; diff --git a/src/include/kompute/Sequence.hpp b/src/include/kompute/Sequence.hpp index de9b9f69..7278dbd3 100644 --- a/src/include/kompute/Sequence.hpp +++ b/src/include/kompute/Sequence.hpp @@ -11,7 +11,7 @@ namespace kp { /** * Container of operations that can be sent to GPU as batch */ -class Sequence : public std::enable_shared_from_this +class Sequence : public std::enable_shared_from_this { public: /** @@ -43,9 +43,9 @@ class Sequence : public std::enable_shared_from_this * * @param op Object derived from kp::BaseOp that will be recoreded by the * sequence which will be used when the operation is evaluated. - * @return shared_ptr of the Sequence class itself + * @return shared_ptr of the Sequence class itself */ - std::shared_ptr record(std::shared_ptr op); + std::shared_ptr record(std::shared_ptr op); /** * Record function for operation to be added to the GPU queue in batch. This @@ -56,11 +56,11 @@ class Sequence : public std::enable_shared_from_this * @param tensors Vector of tensors to use for the operation * @param TArgs Template parameters that are used to initialise operation * which allows for extensible configurations on initialisation. - * @return shared_ptr of the Sequence class itself + * @return shared_ptr of the Sequence class itself */ template - std::shared_ptr record( - std::vector> tensors, + std::shared_ptr record( + std::vector> tensors, TArgs&&... params) { std::shared_ptr op{ new T(tensors, std::forward(params)...) }; @@ -76,11 +76,12 @@ class Sequence : public std::enable_shared_from_this * operations * @param TArgs Template parameters that are used to initialise operation * which allows for extensible configurations on initialisation. - * @return shared_ptr of the Sequence class itself + * @return shared_ptr of the Sequence class itself */ template - std::shared_ptr record(std::shared_ptr algorithm, - TArgs&&... params) + std::shared_ptr record( + std::shared_ptr algorithm, + TArgs&&... params) { std::shared_ptr op{ new T(algorithm, std::forward(params)...) }; @@ -91,18 +92,18 @@ class Sequence : public std::enable_shared_from_this * Eval sends all the recorded and stored operations in the vector of * operations into the gpu as a submit job synchronously (with a barrier). * - * @return shared_ptr of the Sequence class itself + * @return shared_ptr of the Sequence class itself */ - std::shared_ptr eval(); + std::shared_ptr eval(); /** * Resets all the recorded and stored operations, records the operation * provided and submits into the gpu as a submit job synchronously (with a * barrier). * - * @return shared_ptr of the Sequence class itself + * @return shared_ptr of the Sequence class itself */ - std::shared_ptr eval(std::shared_ptr op); + std::shared_ptr eval(std::shared_ptr op); /** * Eval sends all the recorded and stored operations in the vector of @@ -111,11 +112,12 @@ class Sequence : public std::enable_shared_from_this * @param tensors Vector of tensors to use for the operation * @param TArgs Template parameters that are used to initialise operation * which allows for extensible configurations on initialisation. - * @return shared_ptr of the Sequence class itself + * @return shared_ptr of the Sequence class itself */ template - std::shared_ptr eval(std::vector> tensors, - TArgs&&... params) + std::shared_ptr eval( + std::vector> tensors, + TArgs&&... params) { std::shared_ptr op{ new T(tensors, std::forward(params)...) }; return this->eval(op); @@ -128,11 +130,11 @@ class Sequence : public std::enable_shared_from_this * operations * @param TArgs Template parameters that are used to initialise operation * which allows for extensible configurations on initialisation. - * @return shared_ptr of the Sequence class itself + * @return shared_ptr of the Sequence class itself */ template - std::shared_ptr eval(std::shared_ptr algorithm, - TArgs&&... params) + std::shared_ptr eval(std::shared_ptr algorithm, + TArgs&&... params) { std::shared_ptr op{ new T(algorithm, std::forward(params)...) }; @@ -147,7 +149,7 @@ class Sequence : public std::enable_shared_from_this * * @return Boolean stating whether execution was successful. */ - std::shared_ptr evalAsync(); + std::shared_ptr evalAsync(); /** * Clears currnet operations to record provided one in the vector of * operations into the gpu as a submit job without a barrier. EvalAwait() @@ -156,7 +158,7 @@ class Sequence : public std::enable_shared_from_this * * @return Boolean stating whether execution was successful. */ - std::shared_ptr evalAsync(std::shared_ptr op); + std::shared_ptr evalAsync(std::shared_ptr op); /** * Eval sends all the recorded and stored operations in the vector of * operations into the gpu as a submit job with a barrier. @@ -164,11 +166,11 @@ class Sequence : public std::enable_shared_from_this * @param tensors Vector of tensors to use for the operation * @param TArgs Template parameters that are used to initialise operation * which allows for extensible configurations on initialisation. - * @return shared_ptr of the Sequence class itself + * @return shared_ptr of the Sequence class itself */ template - std::shared_ptr evalAsync( - std::vector> tensors, + std::shared_ptr evalAsync( + std::vector> tensors, TArgs&&... params) { std::shared_ptr op{ new T(tensors, std::forward(params)...) }; @@ -182,11 +184,12 @@ class Sequence : public std::enable_shared_from_this * operations * @param TArgs Template parameters that are used to initialise operation * which allows for extensible configurations on initialisation. - * @return shared_ptr of the Sequence class itself + * @return shared_ptr of the Sequence class itself */ template - std::shared_ptr evalAsync(std::shared_ptr algorithm, - TArgs&&... params) + std::shared_ptr evalAsync( + std::shared_ptr algorithm, + TArgs&&... params) { std::shared_ptr op{ new T(algorithm, std::forward(params)...) }; @@ -198,9 +201,9 @@ class Sequence : public std::enable_shared_from_this * finishes, it runs the postEval of all operations. * * @param waitFor Number of milliseconds to wait before timing out. - * @return shared_ptr of the Sequence class itself + * @return shared_ptr of the Sequence class itself */ - std::shared_ptr evalAwait(uint64_t waitFor = UINT64_MAX); + std::shared_ptr evalAwait(uint64_t waitFor = UINT64_MAX); /** * Clear function clears all operations currently recorded and starts @@ -277,7 +280,7 @@ class Sequence : public std::enable_shared_from_this // -------------- ALWAYS OWNED RESOURCES vk::Fence mFence; - std::vector> mOperations{}; + std::vector> mOperations{}; std::shared_ptr timestampQueryPool = nullptr; // State diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp index e39cc64c..973d6fca 100644 --- a/src/include/kompute/Tensor.hpp +++ b/src/include/kompute/Tensor.hpp @@ -60,7 +60,7 @@ class Tensor uint32_t elementTotalCount, uint32_t elementMemorySize, const TensorDataTypes& dataType, - const TensorTypes& tensorType = TensorTypes::eDevice); + const TensorTypes& tensorType = kp::Tensor::TensorTypes::eDevice); /** * Destructor which is in charge of freeing vulkan resources unless they @@ -107,7 +107,7 @@ class Tensor * @param copyFromTensor Tensor to copy the data from */ void recordCopyFrom(const vk::CommandBuffer& commandBuffer, - std::shared_ptr copyFromTensor); + std::shared_ptr copyFromTensor); /** * Records a copy from the internal staging memory to the device memory @@ -304,7 +304,7 @@ class TensorT : public Tensor TensorT(std::shared_ptr physicalDevice, std::shared_ptr device, const std::vector& data, - const TensorTypes& tensorType = TensorTypes::eDevice) + const TensorTypes& tensorType = kp::Tensor::TensorTypes::eDevice) : Tensor(physicalDevice, device, (void*)data.data(), @@ -339,7 +339,7 @@ class TensorT : public Tensor "Kompute TensorT Cannot set data of different sizes"); } - Tensor::setRawData(data.data()); + kp::Tensor::setRawData(data.data()); } TensorDataTypes dataType(); diff --git a/src/include/kompute/operations/OpAlgoDispatch.hpp b/src/include/kompute/operations/OpAlgoDispatch.hpp index e91598f0..f1b9662c 100644 --- a/src/include/kompute/operations/OpAlgoDispatch.hpp +++ b/src/include/kompute/operations/OpAlgoDispatch.hpp @@ -77,7 +77,7 @@ class OpAlgoDispatch : public OpBase private: // -------------- ALWAYS OWNED RESOURCES - std::shared_ptr mAlgorithm; + std::shared_ptr mAlgorithm; void* mPushConstantsData = nullptr; uint32_t mPushConstantsDataTypeMemorySize = 0; uint32_t mPushConstantsSize = 0; diff --git a/src/include/kompute/operations/OpMemoryBarrier.hpp b/src/include/kompute/operations/OpMemoryBarrier.hpp index 4a232232..efe00827 100644 --- a/src/include/kompute/operations/OpMemoryBarrier.hpp +++ b/src/include/kompute/operations/OpMemoryBarrier.hpp @@ -34,7 +34,7 @@ class OpMemoryBarrier : public OpBase * @param barrierOnPrimary Boolean to select primary or secondary buffers on * tensors */ - OpMemoryBarrier(const std::vector>& tensors, + OpMemoryBarrier(const std::vector>& tensors, const vk::AccessFlagBits& srcAccessMask, const vk::AccessFlagBits& dstAccessMask, const vk::PipelineStageFlagBits& srcStageMask, @@ -75,7 +75,7 @@ class OpMemoryBarrier : public OpBase const vk::PipelineStageFlagBits mSrcStageMask; const vk::PipelineStageFlagBits mDstStageMask; const bool mBarrierOnPrimary; - const std::vector> mTensors; + const std::vector> mTensors; }; } // End namespace kp diff --git a/src/include/kompute/operations/OpMult.hpp b/src/include/kompute/operations/OpMult.hpp index f75ccc4f..4b52c962 100644 --- a/src/include/kompute/operations/OpMult.hpp +++ b/src/include/kompute/operations/OpMult.hpp @@ -30,8 +30,8 @@ class OpMult : public OpAlgoDispatch * @param algorithm An algorithm that will be overridden with the OpMult * shader data and the tensors provided which are expected to be 3 */ - OpMult(std::vector> tensors, - std::shared_ptr algorithm) + OpMult(std::vector> tensors, + std::shared_ptr algorithm) : OpAlgoDispatch(algorithm) { KP_LOG_DEBUG("Kompute OpMult constructor with params"); diff --git a/src/include/kompute/operations/OpTensorCopy.hpp b/src/include/kompute/operations/OpTensorCopy.hpp index 968c1065..d8d5e07b 100644 --- a/src/include/kompute/operations/OpTensorCopy.hpp +++ b/src/include/kompute/operations/OpTensorCopy.hpp @@ -24,7 +24,7 @@ class OpTensorCopy : public OpBase * * @param tensors Tensors that will be used to create in operation. */ - OpTensorCopy(const std::vector>& tensors); + OpTensorCopy(const std::vector>& tensors); /** * Default destructor. This class does not manage memory so it won't be @@ -57,7 +57,7 @@ class OpTensorCopy : public OpBase private: // -------------- ALWAYS OWNED RESOURCES - std::vector> mTensors; + std::vector> mTensors; }; } // End namespace kp diff --git a/src/include/kompute/operations/OpTensorSyncDevice.hpp b/src/include/kompute/operations/OpTensorSyncDevice.hpp index 3a1792ac..0146288b 100644 --- a/src/include/kompute/operations/OpTensorSyncDevice.hpp +++ b/src/include/kompute/operations/OpTensorSyncDevice.hpp @@ -9,11 +9,11 @@ namespace kp { /** * Operation that syncs tensor's device by mapping local data into the device - * memory. For TensorTypes::eDevice it will use a record operation for the - * memory to be syncd into GPU memory which means that the operation will be - * done in sync with GPU commands. For TensorTypes::eHost it will only map the - * data into host memory which will happen during preEval before the recorded - * commands are dispatched. + * memory. For kp::Tensor::TensorTypes::eDevice it will use a record operation + * for the memory to be syncd into GPU memory which means that the operation + * will be done in sync with GPU commands. For kp::Tensor::TensorTypes::eHost it + * will only map the data into host memory which will happen during preEval + * before the recorded commands are dispatched. */ class OpTensorSyncDevice : public OpBase { @@ -21,11 +21,11 @@ class OpTensorSyncDevice : public OpBase /** * Default constructor with parameters that provides the core vulkan * resources and the tensors that will be used in the operation. The tensos - * provided cannot be of type TensorTypes::eStorage. + * provided cannot be of type kp::Tensor::TensorTypes::eStorage. * * @param tensors Tensors that will be used to create in operation. */ - OpTensorSyncDevice(const std::vector>& tensors); + OpTensorSyncDevice(const std::vector>& tensors); /** * Default destructor. This class does not manage memory so it won't be @@ -57,7 +57,7 @@ class OpTensorSyncDevice : public OpBase private: // -------------- ALWAYS OWNED RESOURCES - std::vector> mTensors; + std::vector> mTensors; }; } // End namespace kp diff --git a/src/include/kompute/operations/OpTensorSyncLocal.hpp b/src/include/kompute/operations/OpTensorSyncLocal.hpp index 4216003e..45889f06 100644 --- a/src/include/kompute/operations/OpTensorSyncLocal.hpp +++ b/src/include/kompute/operations/OpTensorSyncLocal.hpp @@ -11,11 +11,11 @@ namespace kp { /** * Operation that syncs tensor's local memory by mapping device data into the - * local CPU memory. For TensorTypes::eDevice it will use a record operation - * for the memory to be syncd into GPU memory which means that the operation - * will be done in sync with GPU commands. For TensorTypes::eHost it will - * only map the data into host memory which will happen during preEval before - * the recorded commands are dispatched. + * local CPU memory. For kp::Tensor::TensorTypes::eDevice it will use a record + * operation for the memory to be syncd into GPU memory which means that the + * operation will be done in sync with GPU commands. For + * kp::Tensor::TensorTypes::eHost it will only map the data into host memory + * which will happen during preEval before the recorded commands are dispatched. */ class OpTensorSyncLocal : public OpBase { @@ -23,11 +23,11 @@ class OpTensorSyncLocal : public OpBase /** * Default constructor with parameters that provides the core vulkan * resources and the tensors that will be used in the operation. The tensors - * provided cannot be of type TensorTypes::eStorage. + * provided cannot be of type kp::Tensor::TensorTypes::eStorage. * * @param tensors Tensors that will be used to create in operation. */ - OpTensorSyncLocal(const std::vector>& tensors); + OpTensorSyncLocal(const std::vector>& tensors); /** * Default destructor. This class does not manage memory so it won't be @@ -60,7 +60,7 @@ class OpTensorSyncLocal : public OpBase private: // -------------- ALWAYS OWNED RESOURCES - std::vector> mTensors; + std::vector> mTensors; }; } // End namespace kp