-
Notifications
You must be signed in to change notification settings - Fork 3
/
lyra_decoder.h
174 lines (148 loc) · 6.79 KB
/
lyra_decoder.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
/*
* Copyright 2021 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LYRA_CODEC_LYRA_DECODER_H_
#define LYRA_CODEC_LYRA_DECODER_H_
#include <cstdint>
#include <memory>
#include <optional>
#include <vector>
#include "absl/types/span.h"
#include "buffered_filter_interface.h"
#include "feature_estimator_interface.h"
#include "generative_model_interface.h"
#include "include/ghc/filesystem.hpp"
#include "lyra_decoder_interface.h"
#include "noise_estimator_interface.h"
#include "vector_quantizer_interface.h"
#include "model_buffer/model_buffer_interface.h"
namespace chromemedia {
namespace codec {
/// Lyra decoder class.
///
/// This class unpacks the bit stream into features and uses a generative model
/// to decode them into audio samples.
class LyraDecoder : public LyraDecoderInterface {
public:
/// Static method to create a LyraDecoder.
///
/// @param sample_rate_hz Desired sample rate in Hertz. The supported sample
/// rates are 8000, 16000, 32000 and 48000.
/// @param num_channels Desired number of channels. Currently only 1 is
/// supported.
/// @param model_path Path to the model weights. The identifier in the
/// lyra_config.binarypb has to coincide with the
/// |kVersionMinor| constant in lyra_config.cc.
/// @return A unique_ptr to a |LyraDecoder| if all desired params are
/// supported. Else it returns a nullptr.
static std::unique_ptr<LyraDecoder> Create(
int sample_rate_hz, int num_channels,
const ghc::filesystem::path& model_path);
static std::unique_ptr<LyraDecoder> Create(
int sample_rate_hz, int num_channels,
const ModelBufferInterface& model_buffer);
/// Parses a packet and prepares to decode samples from the payload.
///
/// @param encoded Encoded packet as a span of bytes.
/// @return True if the provided packet is a valid Lyra packet.
bool SetEncodedPacket(absl::Span<const uint8_t> encoded) override;
/// Decodes samples.
///
/// If more samples are requested for decoding than are available from the
/// payloads set by |SetEncodedPacket|, this will generate samples in packet
/// loss mode. Packet loss mode will first attempt to conceal the lost packets
/// and then transition to comfort noise.
///
/// @param num_samples Number of samples to decode.
///
/// @return Vector of int16-formatted samples, or nullopt on failure.
std::optional<std::vector<int16_t>> DecodeSamples(int num_samples) override;
/// Getter for the sample rate in Hertz.
///
/// @return Sample rate in Hertz.
int sample_rate_hz() const override;
/// Getter for the number of channels.
///
/// @return Number of channels.
int num_channels() const override;
/// Getter for the frame rate.
///
/// @return Frame rate.
int frame_rate() const override;
/// Checks if the decoder is in comfort noise generation mode.
///
/// @return True if the decoder is in comfort noise generation mode.
bool is_comfort_noise() const override;
private:
// Tracks the direction we are moving along |fade_progress_|.
enum FadeDirection {
kFadeToCNG = 1,
kFadeFromCNG = -1,
};
LyraDecoder() = delete;
LyraDecoder(std::unique_ptr<GenerativeModelInterface> generative_model,
std::unique_ptr<GenerativeModelInterface> comfort_noise_generator,
std::unique_ptr<VectorQuantizerInterface> vector_quantizer,
std::unique_ptr<NoiseEstimatorInterface> noise_estimator,
std::unique_ptr<FeatureEstimatorInterface> feature_estimator,
std::unique_ptr<BufferedFilterInterface> resampler,
int external_sample_rate_hz, int num_channels);
// Runs the while loop for generating samples at the internal sample rate.
std::optional<std::vector<int16_t>> DecodeSamplesInternal(
int internal_num_samples_to_generate);
// Overlaps hops using a cos^2 window.
// Returns true on success, false on failure.
bool MaybeOverlapAndInsert(FadeDirection fade_direction, int fade_progress,
const std::vector<int16_t>& generative_model_hop,
const std::vector<int16_t>& comfort_noise_hop,
std::vector<int16_t>& result);
// Runs the generative model and adds estimated features if needed.
std::optional<std::vector<int16_t>> RunGenerativeModel(int num_samples);
// Runs the comfort noise generator and adds estimated features if needed.
std::optional<std::vector<int16_t>> RunComfortNoiseGenerator(int num_samples);
// Generates time domain samples from conditioning features.
std::unique_ptr<GenerativeModelInterface> generative_model_;
// Generates comfort noise from background noise estimates.
std::unique_ptr<GenerativeModelInterface> comfort_noise_generator_;
// Used to get the conditioning features from the bit-stream.
std::unique_ptr<VectorQuantizerInterface> vector_quantizer_;
// Estimates background noise for conditioning to the
// |comfort_noise_generator_|.
std::unique_ptr<NoiseEstimatorInterface> noise_estimator_;
// Used as conditioning for the |generative_model_| when more samples were
// requested than contained in received packets.
std::unique_ptr<FeatureEstimatorInterface> feature_estimator_;
// Resamples from the generative model sample rate to the external sampling
// rate.
std::unique_ptr<BufferedFilterInterface> resampler_;
// The packet loss state is described by the following three variables:
// Ranges from [-num_samples_per_packet + 1, concealment_length].
// If less than zero, abs(|concealment_progress_|) indicates how many
// concealment samples until we will begin playing out a received packet.
// Otherwise tracks samples since we last played out a received packet.
int concealment_progress_;
// Ranges from [0, fade_duration_samples_]. 0 indicates we are only generating
// model output. |fade_duration_samples| indicates we are only generating
// comfort noise. Values in between indicate a fade is in progress.
int fade_progress_;
// Indicates if we are incrementing or decrementing |fade_progress|.
FadeDirection fade_direction_;
const int external_sample_rate_hz_;
const int num_channels_;
friend class LyraDecoderPeer;
};
} // namespace codec
} // namespace chromemedia
#endif // LYRA_CODEC_LYRA_DECODER_H_