Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

assorted fixes and performance improvements #134

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/celt_lpc.c
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ int _celt_autocorr(
int n)
{
opus_val32 d;
int i, k;
int i = 0, k = 0;
Copy link

@e00E e00E Aug 19, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't doubt that the static analyzer complains but these initializations are not required because either way both variables are set to 0 when they are used. There is a general argument for always obviously initializing everything but this project seems to prefer to initialize only when needed so I feel that this change is inconsistent with the project style.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think a compromise between initializing everything and being as terse as possible is initializing when a static analyzer highlights a potential problem due to a series of branches, however unlikely that may maybe.

Copy link

@e00E e00E Aug 19, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do believe that these are always initialized (and cannot possibly be unintialized), the static analyzer is just not good enough to figure it out. On the other hand initializing has no drawback either except the mentioned style differences and it is safer in terms of human error. Probably another thing for a maintainer to decide.

int fastN=n-lag;
int shift;
const opus_val16 *xptr;
Expand Down
5 changes: 5 additions & 0 deletions src/denoise.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@
#include "config.h"
#endif

#ifndef M_PI
#define M_PI (3.14159265358979323846)
#endif

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
Expand Down Expand Up @@ -277,6 +281,7 @@ DenoiseState *rnnoise_create(RNNModel *model) {
}

void rnnoise_destroy(DenoiseState *st) {
if (common.init) opus_fft_free(common.kfft, 0);
free(st->rnn.vad_gru_state);
free(st->rnn.noise_gru_state);
free(st->rnn.denoise_gru_state);
Expand Down
2 changes: 1 addition & 1 deletion src/kiss_fft.c
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,
if (st) {
opus_int16 *bitrev;
kiss_twiddle_cpx *twiddles;

memset(st, 0, sizeof(kiss_fft_state));
st->nfft=nfft;
#ifdef FIXED_POINT
st->scale_shift = celt_ilog2(st->nfft);
Expand Down
119 changes: 67 additions & 52 deletions src/rnn.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,33 +76,39 @@ static OPUS_INLINE float relu(float x)
return x < 0 ? 0 : x;
}

static void faxpy(float *restrict a, const rnn_weight *restrict b, int k, float u)
{
if (u == 0.0) return;
for (int idx = 0; idx < k; idx++)
a[idx] += b[idx] * u;
}

void compute_dense(const DenseLayer *layer, float *output, const float *input)
{
int i, j;
int N, M;
int stride;
M = layer->nb_inputs;
N = layer->nb_neurons;
stride = N;
for (i=0;i<N;i++)
{
/* Compute update gate. */
float sum = layer->bias[i];
for (j=0;j<M;j++)
sum += layer->input_weights[j*stride + i]*input[j];
output[i] = WEIGHTS_SCALE*sum;
}
if (layer->activation == ACTIVATION_SIGMOID) {
const rnn_weight *ip = layer->input_weights;
/* Compute update gate. */
for(i = 0; i < N; i++)
output[i] = layer->bias[i];
for (j=0;j<M;j++,ip+=N)
faxpy(output, ip, N, input[j]);
switch (layer->activation) {
case ACTIVATION_SIGMOID:
for (i=0;i<N;i++)
output[i] = sigmoid_approx(output[i]);
} else if (layer->activation == ACTIVATION_TANH) {
output[i] = sigmoid_approx(WEIGHTS_SCALE * output[i]);
break;
case ACTIVATION_TANH:
for (i=0;i<N;i++)
output[i] = tansig_approx(output[i]);
} else if (layer->activation == ACTIVATION_RELU) {
output[i] = tansig_approx(WEIGHTS_SCALE * output[i]);
break;
default:
case ACTIVATION_RELU:
for (i=0;i<N;i++)
output[i] = relu(output[i]);
} else {
*(int*)0=0;
output[i] = relu(WEIGHTS_SCALE * output[i]);
break;
}
}

Expand All @@ -117,42 +123,49 @@ void compute_gru(const GRULayer *gru, float *state, const float *input)
M = gru->nb_inputs;
N = gru->nb_neurons;
stride = 3*N;
for (i=0;i<N;i++)
{
/* Compute update gate. */
float sum = gru->bias[i];
for (j=0;j<M;j++)
sum += gru->input_weights[j*stride + i]*input[j];
for (j=0;j<N;j++)
sum += gru->recurrent_weights[j*stride + i]*state[j];
z[i] = sigmoid_approx(WEIGHTS_SCALE*sum);
}
for (i=0;i<N;i++)
{
/* Compute reset gate. */
float sum = gru->bias[N + i];
for (j=0;j<M;j++)
sum += gru->input_weights[N + j*stride + i]*input[j];
for (j=0;j<N;j++)
sum += gru->recurrent_weights[N + j*stride + i]*state[j];
r[i] = sigmoid_approx(WEIGHTS_SCALE*sum);
}
for (i=0;i<N;i++)
{
/* Compute output. */
float sum = gru->bias[2*N + i];
for (j=0;j<M;j++)
sum += gru->input_weights[2*N + j*stride + i]*input[j];
for (j=0;j<N;j++)
sum += gru->recurrent_weights[2*N + j*stride + i]*state[j]*r[j];
if (gru->activation == ACTIVATION_SIGMOID) sum = sigmoid_approx(WEIGHTS_SCALE*sum);
else if (gru->activation == ACTIVATION_TANH) sum = tansig_approx(WEIGHTS_SCALE*sum);
else if (gru->activation == ACTIVATION_RELU) sum = relu(WEIGHTS_SCALE*sum);
else *(int*)0=0;
h[i] = z[i]*state[i] + (1-z[i])*sum;
const rnn_weight *ip = gru->input_weights;
const rnn_weight *rp = gru->recurrent_weights;
/* Compute update gate. */
for(i = 0; i < N; i++)
z[i] = gru->bias[i];
for (j=0;j<M;j++,ip+=stride)
faxpy(z, ip, N, input[j]);
for (j=0;j<N;j++,rp+=stride)
faxpy(z, rp, N, state[j]);
for(i = 0; i < N; i++)
z[i] = sigmoid_approx(WEIGHTS_SCALE*z[i]);
/* Compute reset gate. */
for(i = 0; i < N; i++)
r[i] = gru->bias[N+i];
ip = gru->input_weights + N;
rp = gru->recurrent_weights + N;
for (j=0;j<M;j++,ip+=stride)
faxpy(r, ip, N, input[j]);
for (j=0;j<N;j++,rp+=stride)
faxpy(r, rp, N, state[j]);
for(i = 0; i < N; i++)
r[i] = sigmoid_approx(WEIGHTS_SCALE*r[i]);

/* Compute output. */
for(i = 0; i < N; i++)
h[i] = gru->bias[2*N+i];
ip = gru->input_weights + 2*N;
rp = gru->recurrent_weights + 2*N;
for (j=0;j<M;j++,ip+=stride)
faxpy(h, ip, N, input[j]);
for (j=0;j<N;j++,rp+=stride)
faxpy(h, rp, N, r[j]*state[j]);
for (i=0;i<N;i++) {
switch (gru->activation) {
case ACTIVATION_SIGMOID: h[i] = sigmoid_approx(WEIGHTS_SCALE*h[i]);break;
case ACTIVATION_TANH: h[i] = tansig_approx(WEIGHTS_SCALE*h[i]); break;
default:
case ACTIVATION_RELU: h[i] = relu(WEIGHTS_SCALE*h[i]); break;
}
h[i] = z[i]*state[i] + (1-z[i])*h[i];
}
for (i=0;i<N;i++)
state[i] = h[i];
state[i] = h[i];
}

#define INPUT_SIZE 42
Expand All @@ -162,6 +175,8 @@ void compute_rnn(RNNState *rnn, float *gains, float *vad, const float *input) {
float dense_out[MAX_NEURONS];
float noise_input[MAX_NEURONS*3];
float denoise_input[MAX_NEURONS*3];

memset(dense_out, 0, sizeof(dense_out));
compute_dense(rnn->model->input_dense, dense_out, input);
compute_gru(rnn->model->vad_gru, rnn->vad_gru_state, dense_out);
compute_dense(rnn->model->vad_output, vad, rnn->vad_gru_state);
Expand Down
2 changes: 1 addition & 1 deletion src/rnn.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,4 @@ void compute_gru(const GRULayer *gru, float *state, const float *input);

void compute_rnn(RNNState *rnn, float *gains, float *vad, const float *input);

#endif /* _MLP_H_ */
#endif /* _RNN_H_ */
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not able to evaluate the rest of the changes, but applied this one to get it out of the way.