From 01d97c592b7813bde983092d2e9f253c9648c7cb Mon Sep 17 00:00:00 2001 From: anikocharyan Date: Mon, 27 May 2024 11:07:29 +0200 Subject: [PATCH] Added support for --compiled-grammar. --- CMakeLists.txt | 2 ++ README.md | 6 +++--- headers/Grammar.h | 4 +++- src/CMakeLists.txt | 4 +++- src/Configuration.cpp | 8 ++++---- src/Grammar.cpp | 25 ++++++++++++++++++++----- src/RecognitionClient.cpp | 21 ++++++++++++++++++--- 7 files changed, 53 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c601a6..a2ad2b0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,8 @@ cmake_minimum_required(VERSION 3.14) project(cli_client VERSION 0.0.1) include(${CMAKE_BINARY_DIR}/conan_paths.cmake) +set(CMAKE_CXX_STANDARD 20) + option(USE_CXX11_ABI_0 "" ON) if (USE_CXX11_ABI_0) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0") diff --git a/README.md b/README.md index f06f80e..8beb30a 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ cli_client ``` command like: ```shell -cli_client -a audiofile.wav -T generic -t my.token -l en-US --asr-version V1 -H us.speechcenter.verbio.com -s 16000 --client-id my-client-id --client-secret my-client-secret +cli_client -a audiofile.wav -T GENERIC -t my.token -l en-US --asr-version V1 -H us.speechcenter.verbio.com -s 16000 --client-id my-client-id --client-secret my-client-secret ``` Which will give an output along these lines: @@ -269,9 +269,9 @@ There are three options available to provide a grammar: - The inline grammar option expects a grammar passed inline as a string. - The grammar URI option expects a URI, either pointing to a built-in grammar or to a grammar that is being hosted externally. -- The compiled grammar expects a filename of the compiled grammar binary. +- The compiled grammar expects a filename (a .tar.xz file) of the previously compiled grammar. -> **THIS FEATURE IS STILL IN DEVELOPMENT, PLEASE ONLY USE THE GRAMMAR URI OPTION WITH BUILTIN GRAMMARS, OR AN ERROR WILL BE GIVEN.** +> **THE INLINE GRAMMAR OPTION IS NOT IMPLEMENTED YET.** #### Language diff --git a/headers/Grammar.h b/headers/Grammar.h index 5e68080..cd893ea 100644 --- a/headers/Grammar.h +++ b/headers/Grammar.h @@ -26,10 +26,12 @@ class Grammar { std::vector getCompiledBytes() const; private: + void readCompiledGrammar(); + GrammarType type; std::string content; std::vector compiledBytes; }; -#endif//SPEECHCENTER_GRAMMAR_H +#endif //SPEECHCENTER_GRAMMAR_H diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e6bccf2..ab01877 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -16,7 +16,9 @@ target_link_libraries(speech-center-client PUBLIC SndFile::sndfile cpr::cpr jwt-cpp::jwt-cpp - nlohmann_json::nlohmann_json) + nlohmann_json::nlohmann_json + stdc++fs +) add_executable(cli_client main.cpp) diff --git a/src/Configuration.cpp b/src/Configuration.cpp index 77038c2..140c940 100644 --- a/src/Configuration.cpp +++ b/src/Configuration.cpp @@ -20,11 +20,11 @@ void Configuration::parse(int argc, char **argv) { cxxopts::Options options(argv[0], "Verbio Technlogies S.L. - Speech Center client example"); options.set_width(180).allow_unrecognised_options().add_options() ("a,audio", - "Path to a .wav audio in 8kHz or 16kHz sampling rate and PCM16 encoding to use for the recognition", + "Path to a .wav audio in 8kHz or 16kHz sampling rate and PCM16 encoding to use for the recognition.", cxxopts::value(audioPath), "file") - ("I,inline-grammar", "ABNF Grammar to use for the recognition passed as a string", cxxopts::value(grammarInline), "string") - ("G,grammar-uri", "Grammar URI to use for the recognition (builtin or externally served)", cxxopts::value(grammarUri), "uri") - ("C,compiled-grammar", "Path to the compiled grammar file to use for the recognition", cxxopts::value(grammarCompiled), "file") + ("I,inline-grammar", "ABNF Grammar to use for the recognition passed as a string.", cxxopts::value(grammarInline), "string") + ("G,grammar-uri", "Grammar URI to use for the recognition (builtin or externally served).", cxxopts::value(grammarUri), "uri") + ("C,compiled-grammar", "Path to the compiled grammar file (a .tar.xz file) to use for the recognition.", cxxopts::value(grammarCompiled), "file") ("T,topic", "Topic to use for the recognition when a grammar is not provided. Must be GENERIC | BANKING | TELCO | INSURANCE", cxxopts::value(topic)) diff --git a/src/Grammar.cpp b/src/Grammar.cpp index a288451..72edf09 100644 --- a/src/Grammar.cpp +++ b/src/Grammar.cpp @@ -1,15 +1,13 @@ #include "Grammar.h" + #include +#include Grammar::Grammar() : type{NONE}, content{} {} Grammar::Grammar(const GrammarType type, const std::string content) : type(type), content(content) { if (type == COMPILED) { - std::ifstream input(content, std::ios::binary); - compiledBytes = std::vector ( - (std::istreambuf_iterator(input)), - (std::istreambuf_iterator())); - input.close(); + readCompiledGrammar(); } } @@ -25,4 +23,21 @@ std::string Grammar::getContent() const { std::vector Grammar::getCompiledBytes() const { return compiledBytes; +} + +void Grammar::readCompiledGrammar() { + if(!std::filesystem::exists(content)) { + throw std::invalid_argument("Compiled grammar file '" + content + "' does not exist."); + } + const auto kXzExtension = std::filesystem::path(content).extension(); + const auto kFileNameWithoutXzExtension = std::filesystem::path(content).stem(); + const auto kTarExtension = std::filesystem::path(kFileNameWithoutXzExtension).extension(); + if(kXzExtension != ".xz" || kTarExtension != ".tar") { + throw std::invalid_argument("Compiled grammar file '" + content + "' extension is not .tar.xz."); + } + std::ifstream input(content, std::ios::binary); + compiledBytes = std::vector ( + (std::istreambuf_iterator(input)), + (std::istreambuf_iterator())); + input.close(); } \ No newline at end of file diff --git a/src/RecognitionClient.cpp b/src/RecognitionClient.cpp index 27fde74..00a6815 100644 --- a/src/RecognitionClient.cpp +++ b/src/RecognitionClient.cpp @@ -18,6 +18,22 @@ using namespace speechcenter::recognizer::v1; typedef RecognitionStreamingRequest Request; typedef RecognitionStreamingResponse Response; +namespace { + + std::string buildLogString(Request request) { + if (request.config().has_resource() && + request.config().resource().has_grammar() && + request.config().resource().grammar().has_compiled_grammar()) { + GrammarResource* resource = request.mutable_config()->mutable_resource()->mutable_grammar(); + std::string str = "Compiled Grammar"; + std::vector bytes(str.begin(), str.end()); + resource->set_compiled_grammar(bytes.data(), bytes.size()); + } + return request.DebugString(); + } + +} + void RecognitionClient::write( std::shared_ptr> @@ -25,7 +41,7 @@ void RecognitionClient::write( INFO("Writing to stream..."); Request recognitionConfig = buildRecognitionConfig(); - INFO("Sending config: \n{} ", recognitionConfig.DebugString()); + INFO("Sending config: \n{} ", buildLogString(recognitionConfig)); bool streamFail = !stream->Write(recognitionConfig); if (streamFail) { auto status = stream->Finish(); @@ -308,5 +324,4 @@ RecognitionConfig_AsrVersion RecognitionClient::buildAsrVersion() { } return topicIter->second; -} - +} \ No newline at end of file