You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
lpcnetfreedv/lpcnetfreedv-test.patch

1487 lines
47 KiB

diff --git a/.travis.yml b/.travis.yml
index fb795aa..e92c398 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -29,7 +29,7 @@ script:
- cd src && sox ../../wav/wia.wav -t raw -r 16000 - | ./lpcnet_enc -s | ./lpcnet_dec -s > /dev/null
# some LPCNet ctests
- ls -l
- - cd $BUILDDIR && ctest
+ - cd $BUILDDIR && ctest --output-on-failure
# Re-build codec2 with LPCNet and test FreeDV 2020 support
- cd $CODEC2DIR/build_linux
- make clean
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 680f52c..1d5b623 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,6 +10,7 @@ project(LPCNet C)
option(DISABLE_CPU_OPTIMIZATION "Disable CPU optimization discovery." OFF)
option(AVX2 "Enable AVX2 CPU optimizations." OFF)
option(AVX "Enable AVX CPU optimizations." OFF)
+option(SSE "Enable SSE CPU optimizations." OFF)
option(NEON "Enable NEON CPU optimizations for RPi." OFF)
include(GNUInstallDirs)
@@ -19,6 +20,11 @@ mark_as_advanced(CLEAR
CMAKE_INSTALL_LIBDIR
)
+# Build universal ARM64 and x86_64 binaries on Mac.
+if(BUILD_OSX_UNIVERSAL)
+set(CMAKE_OSX_ARCHITECTURES "x86_64;arm64")
+endif(BUILD_OSX_UNIVERSAL)
+
#
# Prevent in-source builds
# If an in-source build is attempted, you will still need to clean up a few
@@ -43,15 +49,41 @@ set(LPCNET_VERSION_MINOR 2)
set(LPCNET_VERSION_PATCH FALSE)
set(LPCNET_VERSION "${LPCNET_VERSION_MAJOR}.${LPCNET_VERSION_MINOR}")
# Patch level version bumps should not change API/ABI.
-set(SOVERSION "${LPCNET_VERSION_MAJOR}.${LPCNET_VERSION_MINOR}")
+set(LPCNET_SOVERSION "${LPCNET_VERSION_MAJOR}.${LPCNET_VERSION_MINOR}")
if(LPCNET_VERSION_PATCH)
set(LPCNET_VERSION "${LPCNET_VERSION}.${LPCNET_VERSION_PATCH}")
endif()
message(STATUS "LPCNet version: ${LPCNET_VERSION}")
+#
+# Find the git hash if this is a working copy.
+#
+if(EXISTS ${CMAKE_SOURCE_DIR}/.git)
+ find_package(Git QUIET)
+ if(Git_FOUND)
+ execute_process(
+ COMMAND "${GIT_EXECUTABLE}" describe --always HEAD
+ WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
+ RESULT_VARIABLE res
+ OUTPUT_VARIABLE FREEDV_HASH
+ ERROR_QUIET
+ OUTPUT_STRIP_TRAILING_WHITESPACE)
+ message(STATUS "freedv-gui current git hash: ${FREEDV_HASH}")
+ add_definitions(-DGIT_HASH="${FREEDV_HASH}")
+ else()
+ message(WARNING "Git not found. Can not determine current commit hash.")
+ add_definitions(-DGIT_HASH="Unknown")
+ endif()
+else()
+ add_definitions(-DGIT_HASH="None")
+endif()
+
# Set default flags
set(CMAKE_C_FLAGS "-Wall -W -Wextra -Wno-unused-function -O3 -g -I. -MD ${CMAKE_C_FLAGS} -DENABLE_ASSERTIONS")
+# Arch specific stuff here
+message(STATUS "Host system arch is: ${CMAKE_SYSTEM_PROCESSOR}")
+
# Detection of available CPU optimizations
if(NOT DISABLE_CPU_OPTIMIZATION)
if(UNIX AND NOT APPLE)
@@ -60,15 +92,25 @@ if(NOT DISABLE_CPU_OPTIMIZATION)
OUTPUT_VARIABLE AVX2)
execute_process(COMMAND grep -c "avx " /proc/cpuinfo
OUTPUT_VARIABLE AVX)
+ execute_process(COMMAND grep -c "sse4_1 " /proc/cpuinfo
+ OUTPUT_VARIABLE SSE)
execute_process(COMMAND grep -c "neon" /proc/cpuinfo
OUTPUT_VARIABLE NEON)
elseif(APPLE)
- # Under OSX we need to look through a few sysctl entries to determine what our CPU supports.
- message(STATUS "Looking for available CPU optimizations on an OSX system...")
- execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.leaf7_features COMMAND grep -c AVX2
- OUTPUT_VARIABLE AVX2)
- execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.features COMMAND grep -c AVX
- OUTPUT_VARIABLE AVX)
+ if(BUILD_OSX_UNIVERSAL)
+ # Presume AVX/AVX2 are enabled on the x86 side. The ARM side will auto-enable
+ # NEON optimizations by virtue of being aarch64.
+ set(AVX TRUE)
+ set(AVX2 TRUE)
+ set(SSE TRUE)
+ else()
+ # Under OSX we need to look through a few sysctl entries to determine what our CPU supports.
+ message(STATUS "Looking for available CPU optimizations on an OSX system...")
+ execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.leaf7_features COMMAND grep -c AVX2
+ OUTPUT_VARIABLE AVX2)
+ execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.features COMMAND grep -c AVX
+ OUTPUT_VARIABLE AVX)
+ endif(BUILD_OSX_UNIVERSAL)
elseif(WIN32)
message(STATUS "No detection capability on Windows, assuming AVX is available.")
set(AVX TRUE)
@@ -85,9 +127,13 @@ elseif(${AVX} OR ${AVX} GREATER 0)
# AVX2 machines will also match on AVX
message(STATUS "avx processor flags found or enabled.")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx")
+elseif(${SSE} OR ${SSE} GREATER 0)
+# AVX and AVX2 machines will also match on SSE
+ message(STATUS "sse processor flags found or enabled.")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1")
endif()
-# RPi
+# RPi / ARM 32bit
if(${NEON} OR ${NEON} GREATER 0)
message(STATUS "neon processor flags found or enabled.")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -march=armv8-a -mtune=cortex-a53")
diff --git a/README.md b/README.md
index c446450..5b72d8c 100644
--- a/README.md
+++ b/README.md
@@ -25,14 +25,22 @@ LPCNet at 1733 bits/s using direct-split quantiser:
```
sox ../../wav/wia.wav -t raw -r 16000 - | ./lpcnet_enc -s | ./lpcnet_dec -s | aplay -f S16_LE -r 16000
```
-# CTests
+
+## Manually Selecting SIMD Technology
+
+Cmake will select the fastest SIMD available (AVX/SSSE/None), however you can manually select e.g.:
+```
+make -DDISABLE_CPU_OPTIMIZATION=ON -DSSE=ON -DCODEC2_BUILD_DIR=~/codec2/build_linux ..
+```
+
+## CTests
```
$ cd ~/LPCNet/build_linux
$ ctest
```
-Note, due to precision/library issues several tests (1-3) will only pass on certain machines such as Ubuntu 16 and 18, Ubuntu 17 is known to fail.
+Note, due to precision/library issues several tests (1-3) will [only pass on some machines](https://github.com/drowe67/LPCNet/issues/17).
# Reading Further
diff --git a/src/700c_train.sh b/src/700c_train.sh
new file mode 100755
index 0000000..3be057e
--- /dev/null
+++ b/src/700c_train.sh
@@ -0,0 +1,73 @@
+#!/bin/bash -x
+# 700c_train.sh
+# David Rowe March 2020
+# Experiments in LPCNet decoding of Codec 2 700C
+
+PATH=$HOME/codec2/build_linux/src:$HOME/LPCNet/build_linux/src:$HOME/LPCNet/src:$PATH
+
+if [ "$#" -ne 1 ]; then
+ echo "usage: ./700c_train.sh datestamp"
+ echo " ./700c_train.sh 200404"
+ exit 0
+fi
+
+train1=dev-clean-8k
+test1=test-clean-8k
+test2=all_speech_subset_8k
+test3=all_8k
+datestamp=$1
+epochs=30
+log=${1}.txt
+train=${datestamp}_train
+
+# synth "c2sim arg for experiment" "experiment label" "filename"
+synth() {
+ test=$3
+ c2sim ~/Downloads/${test}.sw --rateKWov ${test}.f32 ${1}
+ test_lpcnet --mag 2 --frame_size 80 --pre 0 ${test}.f32 ${datestamp}_${test}_${2}.sw
+}
+
+# experient "c2sim arg for experiment" "experiment label"
+experiment() {
+ echo "------------------------------------------------------------------------------"
+ echo "train starting" ${2}
+ echo "------------------------------------------------------------------------------"
+
+ c2sim ${train}.sw --ten_ms_centre ${train}_10ms.sw --rateKWov ${train}.f32 ${1}
+ sw2packedulaw --frame_size 80 ${train}_10ms.sw ${train}.f32 ${train}_10ms.pulaw
+
+ train_lpcnet.py ${train}.f32 ${train}_10ms.pulaw ${datestamp}_${2} --epochs ${epochs} --frame_size 80
+
+ dump_lpcnet.py ${datestamp}_${2}_${epochs}.h5
+ cp nnet_data.c src
+ make test_lpcnet
+
+ synth "${1}" "${2}" "${test1}"
+ synth "${1}" "${2}" "${test2}"
+ synth "${1}" "${2}" "${test3}"
+}
+
+rm -f $log
+
+(
+ date
+
+ # assemble some training speech
+ sox -r 8000 -c 1 ~/Downloads/${train1}.sw \
+ -t sw -r 8000 -c 1 ${train}.sw
+
+ # LPCNet with 10ms frames (similar to training data)
+ experiment "" "none"
+
+ # Codec 2 700C at 40ms frame rate (700 bits/s) from c2dec
+ c2enc 700C ~/Downloads/${test1}.sw - --eq --var | c2dec 700C - /dev/null --mlfeat ${test1}_dec4.f32
+ test_lpcnet --mag 2 --frame_size 80 --pre 0 ${test1}_dec4.f32 ${datestamp}_${test1}_40.sw
+ c2enc 700C ~/Downloads/${test2}.sw - --eq --var | c2dec 700C - /dev/null --mlfeat ${test2}_dec4.f32
+ test_lpcnet --mag 2 --frame_size 80 --pre 0 ${test2}_dec4.f32 ${datestamp}_${test2}_40.sw
+ c2enc 700C ~/Downloads/${test3}.sw - --eq --var | c2dec 700C - /dev/null --mlfeat ${test3}_dec4.f32
+ test_lpcnet --mag 2 --frame_size 80 --pre 0 ${test3}_dec4.f32 ${datestamp}_${test3}_40.sw
+
+ date
+) |& tee $log
+
+
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 41a78dc..0df4672 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -23,6 +23,8 @@ add_library(lpcnetfreedv SHARED ${lpcnet_freedv_srcs})
target_link_libraries(lpcnetfreedv codec2)
set_target_properties(lpcnetfreedv PROPERTIES
PUBLIC_HEADER lpcnet_freedv.h
+ VERSION ${LPCNET_VERSION}
+ SOVERSION ${LPCNET_SOVERSION}
)
target_include_directories(lpcnetfreedv INTERFACE
$<INSTALL_INTERFACE:include/lpcnet>
@@ -49,11 +51,11 @@ target_link_libraries(dump_data lpcnetfreedv m codec2)
add_executable(test_lpcnet test_lpcnet.c)
target_link_libraries(test_lpcnet lpcnetfreedv m codec2)
-if(AVX OR AVX2)
+if(SSE OR AVX OR AVX2 OR CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
add_executable(test_vec test_vec.c)
target_link_libraries(test_vec m)
else()
- message(WARNING "No AVX/AVX2 CPU flags identified, not building test_vec.")
+ message(WARNING "No SSE/AVX/AVX2 CPU flags identified, not building test_vec.")
endif()
add_executable(quant_feat quant_feat.c)
@@ -98,6 +100,12 @@ target_link_libraries(idct lpcnetfreedv m codec2)
add_executable(nnet2f32 nnet2f32.c)
target_link_libraries(nnet2f32 lpcnetfreedv m)
+add_executable(sw2packedulaw sw2packedulaw.c)
+target_link_libraries(sw2packedulaw lpcnetfreedv m)
+
+add_executable(thash thash.c)
+target_link_libraries(thash lpcnetfreedv m)
+
install(TARGETS lpcnet_enc lpcnet_dec
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
)
diff --git a/src/codec2_pitch.c b/src/codec2_pitch.c
index a267785..55fb5bc 100644
--- a/src/codec2_pitch.c
+++ b/src/codec2_pitch.c
@@ -113,6 +113,7 @@ int codec2_pitch_est(CODEC2_PITCH *pitch, float Sn[], float *f0, float *voicing)
void codec2_pitch_destroy(CODEC2_PITCH *pitch)
{
+ free(pitch->fft_fwd_cfg);
nlp_destroy(pitch->nlp_states);
free(pitch->w);
free(pitch);
diff --git a/src/concat.sh b/src/concat.sh
old mode 100644
new mode 100755
index 8369117..d98ccda
--- a/src/concat.sh
+++ b/src/concat.sh
@@ -1,6 +1,8 @@
-# Place in 16k-LP7 from TSPSpeech.iso and run to concatenate wave files
-# into one headerless training file
-for i in */*.wav
+#!/bin/bash
+# Concatenate .wav files into one headerless .sw training file
+# usage: ./concat.sh concatfile.sw
+
+for i in `find . -name '*.wav'`
do
sox $i -r 16000 -c 1 -t sw -
-done > input.s16
+done > $1
diff --git a/src/dump_data.c b/src/dump_data.c
index cd936cf..4e8d3c4 100644
--- a/src/dump_data.c
+++ b/src/dump_data.c
@@ -453,6 +453,7 @@ int main(int argc, char **argv) {
assert(pitch_index < 2*PITCH_MAX_PERIOD);
assert(pitch_index >= 2*PITCH_MIN_PERIOD);
features[2*NB_BANDS] = 0.01*(pitch_index-200);
+ //fprintf(stderr, "count: %d [36] %f pitch_index: %d\n", count, features[36], pitch_index);
if (c2voicing_en) features[2*NB_BANDS+1] = voicing;
}
fwrite(features, sizeof(float), NB_FEATURES, ffeat);
diff --git a/ext_pitch.sh b/src/ext_pitch.sh
similarity index 100%
rename from ext_pitch.sh
rename to src/ext_pitch.sh
diff --git a/src/flac_to_wav.sh b/src/flac_to_wav.sh
new file mode 100755
index 0000000..8f8aa29
--- /dev/null
+++ b/src/flac_to_wav.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+# Convert all .flac files under this folder to .wav files
+# source: several GitHub repos
+
+find . -iname "*.flac" | wc
+
+for flacfile in `find . -iname "*.flac"`
+do
+ ffmpeg -y -f flac -i $flacfile -ab 64k -ac 1 -ar 16000 -f wav "${flacfile%.*}.wav"
+done
diff --git a/src/freq.c b/src/freq.c
index c88d071..dbe94d9 100644
--- a/src/freq.c
+++ b/src/freq.c
@@ -140,6 +140,13 @@ static void check_init() {
common.init = 1;
}
+void freq_close() {
+ if (common.init) {
+ opus_fft_free(common.kfft,0);
+ common.init = 0;
+ }
+}
+
void dct(float *out, const float *in) {
int i;
check_init();
diff --git a/src/freq.h b/src/freq.h
index 0316edd..314eabd 100644
--- a/src/freq.h
+++ b/src/freq.h
@@ -42,6 +42,7 @@
#define NB_BANDS 18
+void freq_close(void);
void compute_band_energy(float *bandE, const kiss_fft_cpx *X);
void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P);
diff --git a/src/lpcnet.c b/src/lpcnet.c
index e117f1c..9f3f059 100644
--- a/src/lpcnet.c
+++ b/src/lpcnet.c
@@ -54,8 +54,10 @@ struct LPCNetState {
float old_lpc[FEATURES_DELAY][LPC_ORDER];
float old_gain[FEATURES_DELAY];
int frame_count;
+ float preemph;
float deemph_mem;
- FILE *ftest; /* used to dump states for automates tests */
+ int pitch_embedding;
+ FILE *ftest; /* used to dump states for automated tests */
};
@@ -118,6 +120,8 @@ LPCNetState *lpcnet_create()
lpcnet = (LPCNetState *)calloc(sizeof(LPCNetState), 1);
lpcnet->last_exc = 128;
lpcnet->ftest = NULL;
+ lpcnet->preemph = PREEMPH;
+ lpcnet->pitch_embedding = 1;
return lpcnet;
}
@@ -135,7 +139,15 @@ void lpcnet_open_test_file(LPCNetState *lpcnet, char file_name[]) {
}
}
-void lpcnet_synthesize(LPCNetState *lpcnet, short *output, const float *features, int N, int logmag)
+void lpcnet_set_preemph(LPCNetState *lpcnet, float preemph) {
+ lpcnet->preemph = preemph;
+}
+
+void lpcnet_set_pitch_embedding(LPCNetState *lpcnet, int val) {
+ lpcnet->pitch_embedding = val;
+}
+
+void lpcnet_synthesize(LPCNetState *lpcnet, short *output, float *features, int N, int mag)
{
static int count = 0;
int i;
@@ -149,13 +161,19 @@ void lpcnet_synthesize(LPCNetState *lpcnet, short *output, const float *features
static int start = 0; /*(LPC_ORDER+1*/;
/* FIXME: Do proper rounding once the Python code rounds properly. */
- pitch = (int)floor(.1 + 50*features[36]+100);
- assert(pitch >=0); assert(pitch <= 255);
- /* latest networks (using the codec 2 pitch estimator) are trained
- with pitch estimates between 40 and 255, but due to the pitch
- quantiser design and bit errors it's possible to get pitch
- values down to 32, which upsets the pitch embed matrix */
- if (pitch < 40) pitch = 40;
+ if (lpcnet->pitch_embedding) {
+ pitch = (int)floor(.1 + 50*features[36]+100);
+ //fprintf(stderr, "count: %d [36] %f pitch: %d\n", lpcnet->frame_count, features[36], pitch);
+ assert(pitch >=0); assert(pitch <= 255);
+ /* latest networks (using the codec 2 pitch estimator) are trained
+ with pitch estimates between 40 and 255, but due to the pitch
+ quantiser design and bit errors it's possible to get pitch
+ values down to 32, which upsets the pitch embed matrix */
+ if (pitch < 40) pitch = 40;
+ }
+ else {
+ pitch = 0;
+ }
pitch_gain = lpcnet->old_gain[FEATURES_DELAY-1];
memmove(&lpcnet->old_gain[1], &lpcnet->old_gain[0], (FEATURES_DELAY-1)*sizeof(lpcnet->old_gain[0]));
@@ -164,13 +182,30 @@ void lpcnet_synthesize(LPCNetState *lpcnet, short *output, const float *features
memcpy(lpc, lpcnet->old_lpc[FEATURES_DELAY-1], LPC_ORDER*sizeof(lpc[0]));
memmove(lpcnet->old_lpc[1], lpcnet->old_lpc[0], (FEATURES_DELAY-1)*LPC_ORDER*sizeof(lpc[0]));
- if (logmag) {
- float tmp[NB_BANDS];
+ switch (mag) {
+ case 0:
+ lpc_from_cepstrum(lpcnet->old_lpc[0], features);
+ break;
+ case 1:
+ {
+ float tmp[NB_BANDS];
for (i=0;i<NB_BANDS;i++) tmp[i] = pow(10.f, features[i]);
lpc_from_bands(lpcnet->old_lpc[0], tmp);
}
- else
- lpc_from_cepstrum(lpcnet->old_lpc[0], features);
+ break;
+ case 2:
+ for (i=0;i<LPC_ORDER;i++) {
+ lpcnet->old_lpc[0][i] = features[i+NB_BANDS];
+ }
+ break;
+ default:
+ assert(0);
+ }
+
+ /* We optinally use this part of feature vector to pass in LPCs,
+ * but we don't want any non zero values here hitting the
+ * frame rate network. TODO: better design */
+ RNN_CLEAR(&features[18], 18);
if (lpcnet->ftest) {
float pitch_f = pitch;
@@ -220,7 +255,7 @@ void lpcnet_synthesize(LPCNetState *lpcnet, short *output, const float *features
RNN_MOVE(&lpcnet->last_sig[1], &lpcnet->last_sig[0], LPC_ORDER-1);
lpcnet->last_sig[0] = pcm;
lpcnet->last_exc = exc;
- pcm += PREEMPH*lpcnet->deemph_mem;
+ pcm += lpcnet->preemph*lpcnet->deemph_mem;
lpcnet->deemph_mem = pcm;
if (pcm<-32767) pcm = -32767;
if (pcm>32767) pcm = 32767;
diff --git a/src/lpcnet.h b/src/lpcnet.h
index 70e849e..bd98a37 100644
--- a/src/lpcnet.h
+++ b/src/lpcnet.h
@@ -34,8 +34,10 @@
typedef struct LPCNetState LPCNetState;
LPCNetState *lpcnet_create();
void lpcnet_destroy(LPCNetState *lpcnet);
-void lpcnet_synthesize(LPCNetState *lpcnet, short *output, const float *features, int N, int logmag);
+void lpcnet_synthesize(LPCNetState *lpcnet, short *output, float *features, int N, int logmag);
void lpcnet_open_test_file(LPCNetState *lpcnet, char file_name[]);
+void lpcnet_set_preemph(LPCNetState *lpcnet, float preemph);
+void lpcnet_set_pitch_embedding(LPCNetState *lpcnet, int val);
#endif
diff --git a/src/lpcnet.py b/src/lpcnet.py
index 010f478..960e8c8 100644
--- a/src/lpcnet.py
+++ b/src/lpcnet.py
@@ -36,7 +36,6 @@ import numpy as np
import h5py
import sys
-frame_size = 160
pcm_bits = 8
embed_size = 128
pcm_levels = 2**pcm_bits
@@ -113,7 +112,7 @@ class PCMInit(Initializer):
'seed': self.seed
}
-def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 38, training=False, use_gpu=True):
+def new_lpcnet_model(frame_size = 160, rnn_units1=384, rnn_units2=16, nb_used_features = 38, training=False, use_gpu=True):
pcm = Input(shape=(None, 3))
feat = Input(shape=(None, nb_used_features))
pitch = Input(shape=(None, 1))
diff --git a/src/lpcnet_dump.c b/src/lpcnet_dump.c
index d8a8409..58f9c98 100644
--- a/src/lpcnet_dump.c
+++ b/src/lpcnet_dump.c
@@ -87,7 +87,8 @@ static DenoiseState *rnnoise_create() {
}
static void rnnoise_destroy(DenoiseState *st) {
- free(st);
+ freq_close();
+ free(st);
}
static short float2short(float x)
diff --git a/src/lpcnet_freedv.c b/src/lpcnet_freedv.c
index 823fcdc..fe154ea 100644
--- a/src/lpcnet_freedv.c
+++ b/src/lpcnet_freedv.c
@@ -80,3 +80,9 @@ void lpcnet_dec(LPCNetFreeDV *lf, char *frame, short* pcm)
int lpcnet_samples_per_frame(LPCNetFreeDV *lf) { return FRAME_SIZE*lf->q->dec; }
int lpcnet_bits_per_frame(LPCNetFreeDV *lf) { return lf->q->bits_per_frame; }
+
+static char git_hash[] = GIT_HASH;
+char *lpcnet_get_hash(void) {
+ return git_hash;
+}
+
diff --git a/src/lpcnet_freedv.h b/src/lpcnet_freedv.h
index 43c8298..874f7cc 100644
--- a/src/lpcnet_freedv.h
+++ b/src/lpcnet_freedv.h
@@ -8,6 +8,10 @@
#ifndef __LPCNET_FREEDV__
#define __LPCNET_FREEDV__
+#ifdef __cplusplus
+ extern "C" {
+#endif
+
typedef struct LPCNetFreeDV LPCNetFreeDV;
LPCNetFreeDV* lpcnet_freedv_create(int direct_split);
@@ -16,5 +20,10 @@ void lpcnet_enc(LPCNetFreeDV *lf, short *pcm, char *frame);
void lpcnet_dec(LPCNetFreeDV *lf, char *frame, short* pcm);
int lpcnet_bits_per_frame(LPCNetFreeDV *lf);
int lpcnet_samples_per_frame(LPCNetFreeDV *lf);
+char *lpcnet_get_hash(void);
+
+#ifdef __cplusplus
+}
+#endif
#endif
diff --git a/src/nnet.c b/src/nnet.c
index 8ad4a26..1da7d70 100644
--- a/src/nnet.c
+++ b/src/nnet.c
@@ -43,7 +43,9 @@
#ifdef __AVX__
#include "vec_avx.h"
-#elif __ARM_NEON__
+#elif __SSE__
+#include "vec_sse.h"
+#elif __ARM_NEON__ || __aarch64__
#include "vec_neon.h"
#else
#warning Compiling without any vectorization. This code will be very slow
diff --git a/src/plot_lpc.m b/src/plot_lpc.m
new file mode 100644
index 0000000..3b814be
--- /dev/null
+++ b/src/plot_lpc.m
@@ -0,0 +1,50 @@
+% plot_lpc.m
+% David Rowe April 2020
+%
+% Visualise LPC spectra for 700C decoder experiments
+
+Fs = 8000; % speech sample rate
+Fsf = 100; % frame sample rate
+nb_features = 55;
+nb_rateK = 18; % number of rateK (log amplitude) features
+nb_lpc = 10; % number of LPCs
+
+function plot_against_time(v, st_sec, en_sec, Fs, leg='b')
+ st = Fs*st_sec; en = Fs*en_sec;
+ t = st_sec:1/Fs:en_sec;
+ plot(t,v(st+1:en+1),leg);
+endfunction
+
+function mesh_against_time(m, st_sec, en_sec, Fs)
+ st = Fs*st_sec; en = Fs*en_sec;
+ t = st_sec:1/Fs:en_sec;
+ mesh(m(st+1:en+1,:));
+endfunction
+
+function mesh_aks_against_time(aks, st_sec, en_sec, Fs)
+ st = Fs*st_sec; en = Fs*en_sec;
+ t = st_sec:1/Fs:en_sec;
+ aks = aks(st+1:en+1,:); A = [];
+ for f=1:length(aks)
+ A = [A freqz(1,[1 aks(f,:)],64)];
+ end
+ AdB = 20*log10(abs(A));
+ max(AdB(:))
+ mesh(AdB);
+endfunction
+
+# plots of speech (input), rateK vectors, LPC spectra
+
+features=load_f32("../build_linux/all_8k.f32", nb_features);
+rateK=features(:, 1:nb_rateK);
+aks = features(:, nb_rateK+1:nb_rateK+nb_lpc);
+fs=fopen("../build_linux/all_8k_10ms.sw","rb");
+s = fread(fs,Inf,"short");
+fclose(fs);
+
+st_sec=14; en_sec=16;
+
+figure(1); clf; plot_against_time(s, st_sec, en_sec, Fs, 'b')
+figure(2); clf; mesh_against_time(rateK, st_sec, en_sec, Fsf);
+figure(3); clf; mesh_aks_against_time(aks, st_sec, en_sec, Fsf);
+
diff --git a/src/plot_pulaw.py b/src/plot_pulaw.py
new file mode 100755
index 0000000..10d5656
--- /dev/null
+++ b/src/plot_pulaw.py
@@ -0,0 +1,52 @@
+#!/usr/bin/python3
+# Utility to inspect packed ulaw samples from sw2packedulaw.c (or dump_data.c) before training
+
+import numpy as np
+import matplotlib.pyplot as plt
+import sys
+import ulaw
+import argparse
+
+parser = argparse.ArgumentParser(description='Plot LPCNet training packed ulaw samples')
+parser.add_argument('file1', help='pulaw file of packed ulaw samples')
+parser.add_argument('--file2', help='optional second packed ulaw file to compare')
+parser.add_argument('--nb_samples', type=int, default=-1, help='Optional number of samples to plot')
+args = parser.parse_args()
+
+data = np.fromfile(args.file1, dtype='uint8')
+nb_samples = args.nb_samples
+data = data[:nb_samples]
+
+sig = np.array(data[0::4], dtype='float')
+pred = np.array(data[1::4], dtype='float')
+in_exc = np.array(data[2::4], dtype='float')
+out_exc = np.array(data[3::4], dtype='float')
+
+print("exc var: %4.3e" % (np.var(ulaw.ulaw2lin(in_exc))))
+
+plt.figure(1)
+plt.subplot(211)
+plt.plot(ulaw.ulaw2lin(sig), label='sig')
+plt.ylim((-30000,30000))
+plt.legend()
+plt.subplot(212)
+plt.plot(ulaw.ulaw2lin(pred), label='pred')
+plt.ylim((-30000,30000))
+plt.legend()
+plt.show(block=False)
+
+plt.figure(2)
+plt.subplot(211)
+plt.plot(ulaw.ulaw2lin(in_exc), label='in_exc')
+if args.file2:
+ data2 = np.fromfile(args.file2, dtype='uint8')
+ data2 = data2[:nb_samples]
+ in_exc2 = np.array(data2[2::4], dtype='float')
+ plt.plot(ulaw.ulaw2lin(in_exc2), label='in_exc2')
+plt.ylim((-30000,30000))
+plt.legend()
+plt.subplot(212)
+plt.plot(ulaw.ulaw2lin(out_exc), label='out_exc')
+plt.ylim((-30000,30000))
+plt.legend()
+plt.show()
diff --git a/src/plot_train.py b/src/plot_train.py
index 910d7e9..7e2bc7b 100644
--- a/src/plot_train.py
+++ b/src/plot_train.py
@@ -3,11 +3,10 @@ import numpy as np
import sys
loss = np.loadtxt(sys.argv[1])
-delta_loss = (loss[1:,0]-loss[:-1,0])/loss[1:,0]
+delta_loss = (loss[1:]-loss[:-1])/loss[1:]
plt.figure(1)
-plt.plot(loss[:,0],'r')
-plt.plot(loss[:,1],'g')
+plt.plot(loss[:],'r')
plt.title('loss')
plt.show(block=False)
plt.figure(2)
diff --git a/src/plot_train.sh b/src/plot_train.sh
index 2a1fddf..3c86094 100755
--- a/src/plot_train.sh
+++ b/src/plot_train.sh
@@ -6,5 +6,5 @@
# plot graphs of loss and spares categorical accuracy to get a feel
# for progress while training
-grep loss $1 | sed -n 's/.*===\].*loss: \(.*\) - val_loss: \(.*\)/\1 \2/p' > loss.txt
-python3 plot_train.py loss.txt
+grep loss $1 | sed -n 's/.*===\].*step - loss: \(.*\)/\1/p' > loss.txt
+python3 ~/LPCNet/src/plot_train.py loss.txt
diff --git a/process.sh b/src/process.sh
similarity index 100%
rename from process.sh
rename to src/process.sh
diff --git a/src/sw2packedulaw.c b/src/sw2packedulaw.c
new file mode 100644
index 0000000..7724158
--- /dev/null
+++ b/src/sw2packedulaw.c
@@ -0,0 +1,188 @@
+/*
+ sw2packedulaw.c
+
+ Convert signed word samples to packed ulaw samples to drive LPCNet
+ training, this code is a cut/paste from dump_data.c witha few other
+ options.
+
+ By varying the LPC predictor coefficients we can try no predictor,
+ first order, and regular LPC.
+
+ 1. No prediction (WaveRNN I guess):
+ $ ~/codec2/build_linux/src/c2sim ~/Downloads/all_8k.sw --ten_ms_centre all_8k_10ms.sw --rateKWov all_8k.f32
+ $ ./src/sw2packedulaw --frame_size 80 all_8k_10ms.sw all_8k.f32 all_8k_none.pulaw
+ $ ../src/plot_pulaw.py all_8k_none.pulaw
+
+ 2. First order predictor:
+ $ ~/codec2/build_linux/src/c2sim ~/Downloads/all_8k.sw --ten_ms_centre all_8k_10ms.sw --rateKWov all_8k.f32 --first
+ $ ./src/sw2packedulaw --frame_size 80 all_8k_10ms.sw all_8k.f32 all_8k_first.pulaw
+
+ 3. LPC with ulaw Q in the loop and noise injection (standard LPCNet design):
+ $ ~/codec2/build_linux/src/c2sim ~/Downloads/all_8k.sw --ten_ms_centre all_8k_10ms.sw --rateKWov all_8k.f32 --lpc 10
+ $ ./src/sw2packedulaw --frame_size 80all_8k_10ms.sw all_8k.f32 all_8k.pulaw
+
+ 4. LPC with no Q in the loop or noise injection (linear):
+ $ ./src/sw2packedulaw --frame_size 80 --linear all_8k_10ms.sw all_8k.f32 all_8k_linear.pulaw
+
+ See plot_pulaw.py to inspect output .pulaw files
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "common.h"
+#include <math.h>
+#include "freq.h"
+#include "pitch.h"
+#include "arch.h"
+#include "celt_lpc.h"
+#include <assert.h>
+#include <getopt.h>
+
+#define NB_FEATURES 55
+#define CODEC2_LPC_ORDER 10
+
+typedef struct {
+ float lpc[LPC_ORDER];
+ float sig_mem[LPC_ORDER];
+ int exc_mem;
+} DenoiseState;
+
+void write_audio(DenoiseState *st, const short *pcm, float noise_std, FILE *file, int frame_size) {
+ int i;
+ unsigned char data[4*frame_size];
+ for (i=0;i<frame_size;i++) {
+ int noise;
+ float p=0;
+ float e;
+ int j;
+ for (j=0;j<LPC_ORDER;j++) p -= st->lpc[j]*st->sig_mem[j];
+ e = lin2ulaw(pcm[i] - p);
+ /* Signal. */
+ data[4*i] = lin2ulaw(st->sig_mem[0]);
+ /* Prediction. */
+ data[4*i+1] = lin2ulaw(p);
+ /* Excitation in. */
+ data[4*i+2] = st->exc_mem;
+ /* Excitation out. */
+ data[4*i+3] = e;
+ /* Simulate error on excitation. */
+ noise = (int)floor(.5 + noise_std*.707*(log_approx((float)rand()/RAND_MAX)-log_approx((float)rand()/RAND_MAX)));
+ e += noise;
+ e = IMIN(255, IMAX(0, e));
+
+ RNN_MOVE(&st->sig_mem[1], &st->sig_mem[0], LPC_ORDER-1);
+ st->sig_mem[0] = p + ulaw2lin(e);
+ st->exc_mem = e;
+ }
+ fwrite(data, 4*frame_size, 1, file);
+}
+
+/* takes ulaw out of predictor path, and no noise injection */
+void write_audio_linear(DenoiseState *st, const short *pcm, FILE *file, int frame_size) {
+ int i;
+ unsigned char data[4*frame_size];
+ for (i=0;i<frame_size;i++) {
+ float p=0;
+ float e;
+ int j;
+ for (j=0;j<LPC_ORDER;j++) p -= st->lpc[j]*st->sig_mem[j];
+ e = pcm[i] - p;
+ //fprintf(stderr,"pcm: %d p: %f e: %f\n", pcm[i], p, e);
+ /* Signal. */
+ data[4*i] = lin2ulaw(st->sig_mem[0]);
+ /* Prediction. */
+ data[4*i+1] = lin2ulaw(p);
+ /* Excitation in. */
+ data[4*i+2] = st->exc_mem;
+ /* Excitation out. */
+ data[4*i+3] = lin2ulaw(e);
+
+ RNN_MOVE(&st->sig_mem[1], &st->sig_mem[0], LPC_ORDER-1);
+ st->sig_mem[0] = pcm[i];
+ st->exc_mem = lin2ulaw(e);
+ }
+ fwrite(data, 4*frame_size, 1, file);
+}
+
+int main(int argc, char *argv[]) {
+ int linear = 0;
+ int frame_size = FRAME_SIZE;
+
+ DenoiseState st;
+ memset(&st, 0, sizeof(DenoiseState));
+ st.exc_mem = 128;
+
+ int o = 0;
+ int opt_idx = 0;
+ while( o != -1 ) {
+ static struct option long_opts[] = {
+ {"linear", no_argument, 0, 'l'},
+ {"frame_size", required_argument, 0, 'f'},
+ {0, 0, 0, 0}
+ };
+
+ o = getopt_long(argc,argv,"l",long_opts,&opt_idx);
+
+ switch(o){
+ case 'f':
+ frame_size = atoi(optarg);
+ fprintf(stderr, "frame_size: %d\n", frame_size);
+ break;
+ case 'l':
+ linear = 1;
+ break;
+ case '?':
+ goto helpmsg;
+ break;
+ }
+ }
+ int dx = optind;
+
+ if ((argc - dx) < 3) {
+ helpmsg:
+ fprintf(stderr, "usage: s2packedulaw Input.s16 FeatureFile.f32 Output.pulaw\n");
+ return 0;
+ }
+
+ FILE *fsw = fopen(argv[dx], "rb");
+ if (fsw == NULL) {
+ fprintf(stderr, "Can't open %s\n", argv[dx]);
+ exit(1);
+ }
+
+ FILE *ffeature = fopen(argv[dx+1], "rb");
+ if (ffeature == NULL) {
+ fprintf(stderr, "Can't open %s\n", argv[dx+1]);
+ exit(1);
+ }
+
+ FILE *fpackedpcm = fopen(argv[dx+2], "wb");
+ if (fpackedpcm == NULL) {
+ fprintf(stderr, "Can't open %s\n", argv[dx+2]);
+ exit(1);
+ }
+
+ short frame[frame_size];
+ while (fread(frame, sizeof(short), frame_size, fsw) == (unsigned)frame_size) {
+ float features[NB_FEATURES];
+ int ret = fread(features, sizeof(float), NB_FEATURES, ffeature);
+ if (ret != NB_FEATURES) {
+ fprintf(stderr, "feature file ended early!\n");
+ exit(1);
+ }
+ for(int i=0; i<CODEC2_LPC_ORDER; i++) {
+ st.lpc[i] = features[18+i];
+ }
+ if (linear)
+ write_audio_linear(&st, frame, fpackedpcm, frame_size);
+ else {
+ write_audio(&st, frame, 0.5, fpackedpcm, frame_size);
+ }
+ }
+
+ fclose(fsw);
+ fclose(ffeature);
+ fclose(fpackedpcm);
+ return 0;
+}
+
diff --git a/src/test_lpcnet.c b/src/test_lpcnet.c
index 0a34729..e8c9907 100644
--- a/src/test_lpcnet.c
+++ b/src/test_lpcnet.c
@@ -36,26 +36,37 @@
int main(int argc, char **argv) {
FILE *fin, *fout;
LPCNetState *net;
- int logmag = 0;
-
+ int mag = 0;
+ int frame_size = FRAME_SIZE;
+
net = lpcnet_create();
int o = 0;
int opt_idx = 0;
while( o != -1 ) {
static struct option long_opts[] = {
- {"mag", no_argument, 0, 'i'},
- {"nnet", required_argument, 0, 'n'},
+ {"frame_size", required_argument, 0, 'f'},
{"logstates", required_argument, 0, 'l'},
- {0, 0, 0, 0}
+ {"mag", required_argument, 0, 'i'},
+ {"nnet", required_argument, 0, 'n'},
+ {"no_pitch_embedding", no_argument, 0, 'e'},
+ {"pre", required_argument, 0, 'p'},
+ {0, 0, 0, 0}
};
o = getopt_long(argc,argv,"ihn:l:",long_opts,&opt_idx);
switch(o){
+ case 'e':
+ lpcnet_set_pitch_embedding(net, 0);
+ break;
+ case 'f':
+ frame_size = atoi(optarg);
+ fprintf(stderr, "frame_size: %d\n", frame_size);
+ break;
case 'i':
- logmag = 1;
- fprintf(stderr, "logmag: %d\n", logmag);
+ mag = atoi(optarg);
+ fprintf(stderr, "mag: %d\n", mag);
break;
case 'l':
fprintf(stderr, "logstates file: %s\n", optarg);
@@ -65,6 +76,10 @@ int main(int argc, char **argv) {
fprintf(stderr, "loading nnet: %s\n", optarg);
nnet_read(optarg);
break;
+ case 'p':
+ if (atoi(optarg) == 0)
+ lpcnet_set_preemph(net, 0.0);
+ break;
case '?':
goto helpmsg;
break;
@@ -74,7 +89,9 @@ int main(int argc, char **argv) {
if ((argc - dx) < 2) {
helpmsg:
- fprintf(stderr, "usage: test_lpcnet [--mag] [--logstates statesfile] [--nnet lpcnet_xxx.f32] <features.f32> <output.pcm>\n");
+ fprintf(stderr, "usage: test_lpcnet [--mag 1|2] [--logstates statesfile] [--nnet lpcnet_xxx.f32]"
+ " [--framesize samples] [--pre 0|1] <features.f32> <output.s16>\n");
+ fprintf(stderr, "--mag -i 0-cepstrals, 1-logmag, 2-disable LPC (WaveRNN)\n");
return 0;
}
@@ -99,13 +116,12 @@ int main(int argc, char **argv) {
while (1) {
float in_features[NB_TOTAL_FEATURES];
float features[NB_FEATURES];
- short pcm[FRAME_SIZE];
+ short pcm[frame_size];
int nread = fread(in_features, sizeof(features[0]), NB_TOTAL_FEATURES, fin);
if (nread != NB_TOTAL_FEATURES) break;
RNN_COPY(features, in_features, NB_FEATURES);
- RNN_CLEAR(&features[18], 18);
- lpcnet_synthesize(net, pcm, features, FRAME_SIZE, logmag);
- fwrite(pcm, sizeof(pcm[0]), FRAME_SIZE, fout);
+ lpcnet_synthesize(net, pcm, features, frame_size, mag);
+ fwrite(pcm, sizeof(pcm[0]), frame_size, fout);
if (fout == stdout) fflush(stdout);
}
fclose(fin);
diff --git a/src/test_vec.c b/src/test_vec.c
index 09b51e7..efa617e 100644
--- a/src/test_vec.c
+++ b/src/test_vec.c
@@ -26,7 +26,10 @@ const char simd[]="AVX2";
#else
const char simd[]="AVX";
#endif
-#elif __ARM_NEON__
+#elif __SSE__
+#include "vec_sse.h"
+const char simd[]="SSE";
+#elif __ARM_NEON__ || __aarch64__
#include "vec_neon.h"
const char simd[]="NEON";
#else
diff --git a/src/thash.c b/src/thash.c
new file mode 100644
index 0000000..5b60f2e
--- /dev/null
+++ b/src/thash.c
@@ -0,0 +1,19 @@
+/*---------------------------------------------------------------------------*\
+
+ FILE........: thash.c
+ AUTHOR......: David Rowe
+ DATE CREATED: July 2020
+
+ Simple test program for LPCNet API get hash function
+
+\*---------------------------------------------------------------------------*/
+
+#include <stdio.h>
+#include "lpcnet_freedv.h"
+
+int main(void) {
+ printf("%s\n", lpcnet_get_hash());
+ return 0;
+}
+
+
diff --git a/train_direct.sh b/src/train_direct.sh
similarity index 100%
rename from train_direct.sh
rename to src/train_direct.sh
diff --git a/src/train_lpcnet.py b/src/train_lpcnet.py
index 62abbd7..94ab9a8 100755
--- a/src/train_lpcnet.py
+++ b/src/train_lpcnet.py
@@ -35,9 +35,14 @@ from keras.callbacks import ModelCheckpoint
from ulaw import ulaw2lin, lin2ulaw
import keras.backend as K
import h5py
-
+import argparse
+import os
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
+import matplotlib.pyplot as plt
+
+# less verbose tensorflow ....
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
config = tf.ConfigProto()
# use this option to reserve GPU memory, e.g. for running more than
@@ -46,23 +51,38 @@ config = tf.ConfigProto()
set_session(tf.Session(config=config))
-nb_epochs = 10
-
# Try reducing batch_size if you run out of memory on your GPU
batch_size = 32
+# with of feature records used for training
+nb_features = 55
+
+parser = argparse.ArgumentParser(description='LPCNet training')
+parser.add_argument('feature_file', help='.f32 file of float features')
+parser.add_argument('packed_ulaw_file', help='file of 4 multiplexed ulaw samples per speech sample')
+parser.add_argument('prefix', help='.h5 file prefix to easily identify each experiment')
+parser.add_argument('--frame_size', type=int, default=160, help='frames size in samples')
+parser.add_argument('--epochs', type=int, default=20, help='Number of training epochs')
+parser.add_argument('--no_pitch_embedding', action='store_true', help='disable pitch embedding')
+parser.add_argument('--load_h5', help='disable pitch embedding')
+args = parser.parse_args()
-model, _, _ = lpcnet.new_lpcnet_model(training=True)
+nb_epochs = args.epochs
+
+model, _, _ = lpcnet.new_lpcnet_model(frame_size=args.frame_size, training=True)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
model.summary()
-feature_file = sys.argv[1]
-pcm_file = sys.argv[2] # 16 bit unsigned short PCM samples
-prefix = sys.argv[3] # prefix to put on .h5 files to easily name each experiment
+if args.load_h5:
+ print("loading: %s" % (args.load_h5))
+ model.load_weights(args.load_h5)
+
+feature_file = args.feature_file
+pcm_file = args.packed_ulaw_file
+prefix = args.prefix
frame_size = model.frame_size
-nb_features = 55
nb_used_features = model.nb_used_features
-feature_chunk_size = 15
+feature_chunk_size = 15 # time window for conv1d/receptive field
pcm_chunk_size = frame_size*feature_chunk_size
# u for unquantised, load 16 bit PCM samples and convert to mu-law
@@ -84,7 +104,17 @@ in_exc = np.reshape(data[2::4], (nb_frames, pcm_chunk_size, 1))
out_exc = np.reshape(data[3::4], (nb_frames, pcm_chunk_size, 1))
del data
-print("ulaw std = ", np.std(out_exc))
+"""
+# plot ulaw signals to sanity check
+testf=10
+print(sig.shape)
+#plt.plot(sig[testf,:],label="sig")
+#plt.plot(pred[testf,:],label="pred")
+plt.plot(in_exc[testf,:],label="in_exc")
+plt.plot(out_exc[testf,:],label="out_exc")
+plt.legend()
+plt.show()
+"""
features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features))
features = features[:, :, :nb_used_features]
@@ -93,12 +123,34 @@ features = features[:, :, :nb_used_features]
# nb_used_features=38, so 0...37, so lpc-gain not used
features[:,:,18:36] = 0 # zero out 18..35, so pitch and pitch gain being fed in, lpc gain ignored
+"""
+# plot features to sanity check
+print(features.shape)
+testf=10
+plt.plot(features[testf,:,37:38])
+plt.show()
+"""
+
fpad1 = np.concatenate([features[0:1, 0:2, :], features[:-1, -2:, :]], axis=0)
fpad2 = np.concatenate([features[1:, :2, :], features[0:1, -2:, :]], axis=0)
features = np.concatenate([fpad1, features, fpad2], axis=1)
-# pitch feature uses as well as cesptrals
+# pitch feature uses as well as cepstrals
periods = (.1 + 50*features[:,:,36:37]+100).astype('int16')
+print(periods.shape)
+if args.no_pitch_embedding:
+ print("no_pitch_embedding")
+ periods[:] = 0
+# sanity check training data aginst pitch embedding range
+assert np.all(periods >= 40), "pitch embedding < 40"
+assert np.all(periods < 256), "pitch embeddeding > 255"
+
+"""
+# plot pitch to sanity check
+print(features.shape, periods.shape)
+plt.plot(periods.reshape(-1)[:1000])
+plt.show()
+"""
in_data = np.concatenate([sig, pred, in_exc], axis=-1)
@@ -108,9 +160,8 @@ del in_exc
# dump models to disk as we go
#checkpoint = ModelCheckpoint('lpcnet20h_384_10_G16_{epoch:02d}.h5')
-checkpoint = ModelCheckpoint(prefix + '_{epoch:02d}.h5')
+checkpoint = ModelCheckpoint(prefix + '_{epoch:d}.h5')
# use this to reload a partially trained model
-#model.load_weights('lpcnet_190203_07.h5')
model.compile(optimizer=Adam(0.001, amsgrad=True, decay=5e-5), loss='sparse_categorical_crossentropy')
-model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, validation_split=0.1, callbacks=[checkpoint, lpcnet.Sparsify(2000, 40000, 400, (0.05, 0.05, 0.2))])
+model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, callbacks=[checkpoint, lpcnet.Sparsify(2000, 40000, 400, (0.05, 0.05, 0.2))])
diff --git a/train_pred2.sh b/src/train_pred2.sh
similarity index 100%
rename from train_pred2.sh
rename to src/train_pred2.sh
diff --git a/src/vec_avx.h b/src/vec_avx.h
index 1e58f8d..520b5b2 100644
--- a/src/vec_avx.h
+++ b/src/vec_avx.h
@@ -79,7 +79,7 @@ static __m128 exp4_approx(__m128 X)
Y = _mm_castsi128_ps(_mm_and_si128(mask, _mm_add_epi32(I, _mm_castps_si128(Y))));
return Y;
}
-static __m256 exp8_approx(__m256 X)
+static inline __m256 exp8_approx(__m256 X)
{
__m256 Y;
__m128 Xhi, Xlo, Yhi, Ylo;
diff --git a/src/vec_sse.h b/src/vec_sse.h
new file mode 100644
index 0000000..82ddd42
--- /dev/null
+++ b/src/vec_sse.h
@@ -0,0 +1,211 @@
+/* Copyright (c) 2020 SASANO Takayoshi
+ 2018 David Rowe
+ 2018 Mozilla
+ 2008-2011 Octasic Inc.
+ 2012-2017 Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+/*
+ SSE implementation of vector operations, compile with -msse
+ port from Arm NEON support
+*/
+
+#include <xmmintrin.h>
+
+#ifndef LPCNET_TEST
+static float celt_exp2(float x)
+{
+ int integer;
+ float frac;
+ union {
+ float f;
+ opus_uint32 i;
+ } res;
+ integer = floor(x);
+ if (integer < -50)
+ return 0;
+ frac = x-integer;
+ /* K0 = 1, K1 = log(2), K2 = 3-4*log(2), K3 = 3*log(2) - 2 */
+ res.f = 0.99992522f + frac * (0.69583354f
+ + frac * (0.22606716f + 0.078024523f*frac));
+ res.i = (res.i + (integer<<23)) & 0x7fffffff;
+ return res.f;
+}
+#define celt_exp_sse(x) celt_exp2((x)*1.44269504f)
+
+static float tansig_approx(float x)
+{
+ int i;
+ float y, dy;
+ float sign=1;
+ /* Tests are reversed to catch NaNs */
+ if (!(x<8))
+ return 1;
+ if (!(x>-8))
+ return -1;
+#ifndef FIXED_POINT
+ /* Another check in case of -ffast-math */
+ if (celt_isnan(x))
+ return 0;
+#endif
+ if (x<0)
+ {
+ x=-x;
+ sign=-1;
+ }
+ i = (int)floor(.5f+25*x);
+ x -= .04f*i;
+ y = tansig_table[i];
+ dy = 1-y*y;
+ y = y + x*dy*(1 - y*x);
+ return sign*y;
+}
+
+static OPUS_INLINE float sigmoid_approx(float x)
+{
+ return .5f + .5f*tansig_approx(.5f*x);
+}
+
+static void softmax(float *y, const float *x, int N)
+{
+ int i;
+ for (i=0;i<N;i++)
+ y[i] = celt_exp_sse(x[i]);
+}
+
+static void vec_tanh(float *y, const float *x, int N)
+{
+ int i;
+ for (i=0;i<N;i++)
+ {
+ y[i] = tansig_approx(x[i]);
+ }
+}
+
+static void vec_sigmoid(float *y, const float *x, int N)
+{
+ int i;
+ for (i=0;i<N;i++)
+ {
+ y[i] = sigmoid_approx(x[i]);
+ }
+}
+#endif
+
+static void sgemv_accum16(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
+{
+ int i, j;
+ for (i=0;i<rows;i+=16)
+ {
+ float * restrict y = &out[i];
+
+ /* keep y[0..15] in registers for duration of inner loop */
+
+ __m128 y0_3 = _mm_loadu_ps(&y[0]);
+ __m128 y4_7 = _mm_loadu_ps(&y[4]);
+ __m128 y8_11 = _mm_loadu_ps(&y[8]);
+ __m128 y12_15 = _mm_loadu_ps(&y[12]);
+
+ for (j=0;j<cols;j++)
+ {
+ const float * restrict w;
+ __m128 wvec0_3, wvec4_7, wvec8_11, wvec12_15;
+ __m128 xj = _mm_set1_ps(x[j]);
+
+ w = &weights[j*col_stride + i];
+
+ wvec0_3 = _mm_loadu_ps(&w[0]);
+ wvec4_7 = _mm_loadu_ps(&w[4]);
+ wvec8_11 = _mm_loadu_ps(&w[8]);
+ wvec12_15 = _mm_loadu_ps(&w[12]);
+
+ wvec0_3 = _mm_mul_ps(wvec0_3, xj);
+ wvec4_7 = _mm_mul_ps(wvec4_7, xj);
+ wvec8_11 = _mm_mul_ps(wvec8_11, xj);
+ wvec12_15 = _mm_mul_ps(wvec12_15, xj);
+
+ y0_3 = _mm_add_ps(y0_3, wvec0_3);
+ y4_7 = _mm_add_ps(y4_7, wvec4_7);
+ y8_11 = _mm_add_ps(y8_11, wvec8_11);
+ y12_15 = _mm_add_ps(y12_15, wvec12_15);
+ }
+
+ /* save y[0..15] back to memory */
+
+ _mm_storeu_ps(&y[0], y0_3);
+ _mm_storeu_ps(&y[4], y4_7);
+ _mm_storeu_ps(&y[8], y8_11);
+ _mm_storeu_ps(&y[12], y12_15);
+ }
+}
+
+static void sparse_sgemv_accum16(float *out, const float *w, int rows, const int *idx, const float *x)
+{
+ int i, j;
+ for (i=0;i<rows;i+=16)
+ {
+ int cols;
+ cols = *idx++;
+ float * restrict y = &out[i];
+
+ /* keep y[0..15] in registers for duration of inner loop */
+
+ __m128 y0_3 = _mm_loadu_ps(&y[0]);
+ __m128 y4_7 = _mm_loadu_ps(&y[4]);
+ __m128 y8_11 = _mm_loadu_ps(&y[8]);
+ __m128 y12_15 = _mm_loadu_ps(&y[12]);
+
+ for (j=0;j<cols;j++)
+ {
+ __m128 wvec;
+ __m128 xj = _mm_set1_ps(x[*idx++]);
+
+ wvec = _mm_loadu_ps(&w[0]);
+ wvec = _mm_mul_ps(wvec, xj);
+ y0_3 = _mm_add_ps(y0_3, wvec);
+
+ wvec = _mm_loadu_ps(&w[4]);
+ wvec = _mm_mul_ps(wvec, xj);
+ y4_7 = _mm_add_ps(y4_7, wvec);
+
+ wvec = _mm_loadu_ps(&w[8]);
+ wvec = _mm_mul_ps(wvec, xj);
+ y8_11 = _mm_add_ps(y8_11, wvec);
+
+ wvec = _mm_loadu_ps(&w[12]);
+ wvec = _mm_mul_ps(wvec, xj);
+ y12_15 = _mm_add_ps(y12_15, wvec);
+
+ w += 16;
+ }
+
+ /* save y[0..15] back to memory */
+
+ _mm_storeu_ps(&y[0], y0_3);
+ _mm_storeu_ps(&y[4], y4_7);
+ _mm_storeu_ps(&y[8], y8_11);
+ _mm_storeu_ps(&y[12], y12_15);
+ }
+}
diff --git a/train_pred1.sh b/train_pred1.sh
deleted file mode 100755
index 3694252..0000000
--- a/train_pred1.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/sh -x
-# train_pred2.sh
-# David Rowe Jan 2019
-# Train multi-stage VQ for LPCNet
-
-PATH=$PATH:/home/david/codec2-dev/build_linux/misc/
-
-if [ $# -lt 1 ]; then
- echo "usage: ./train_pred1.sh [-w] VQprefix"
- echo " $ ./train_pred1.sh pred1_v1"
- exit 1
-fi
-
-VQ_NAME=$1
-echo $VQ_NAME
-
-K=18
-STOP=1E-2
-
-echo "*********"
-echo "Pred 1"
-echo "*********"
-echo "weighting dctLy[0] ...."
-t=$(mktemp)
-extract all_speech_features.f32 $t 0 17 10 1.0 1
-cat $t | ./weight > $VQ_NAME'_s0.f32'
-vqtrain $VQ_NAME'_s0.f32' $K 2048 $VQ_NAME'_stage1.f32' -r $VQ_NAME'_s1.f32' -s $STOP
-vqtrain $VQ_NAME'_s1.f32' $K 2048 $VQ_NAME'_stage2.f32' -r $VQ_NAME'_s2.f32' -s $STOP
-vqtrain $VQ_NAME'_s2.f32' $K 2048 $VQ_NAME'_stage3.f32' -r $VQ_NAME'_s3.f32' -s $STOP
-vqtrain $VQ_NAME'_s3.f32' $K 2048 $VQ_NAME'_stage4.f32' -r $VQ_NAME'_s4.f32' -s $STOP
-
diff --git a/unittest/test_core_nn.sh b/unittest/test_core_nn.sh
index 392c897..cd955c7 100755
--- a/unittest/test_core_nn.sh
+++ b/unittest/test_core_nn.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/bin/bash -x
# test_core_nn.sh
#
@@ -60,7 +60,7 @@ if [ ! -z $SYNTH_mag ]; then
../build_linux/src/dump_data --mag --test --c2pitch ../wav/c01_01.wav c01_01.f32
diff c01_01_mag.f32 c01_01.f32 || { echo "ERROR in synth .f32 output! Exiting..."; exit 1; }
echo "mag .f32 OK"
- ../build_linux/src/test_lpcnet --mag -n lpcnet_190804a.f32 c01_01.f32 c01_01_out.raw
+ ../build_linux/src/test_lpcnet --mag 1 -n lpcnet_190804a.f32 c01_01.f32 c01_01_out.raw
diff c01_01_190804a_targ.raw c01_01_out.raw || { echo "ERROR in synth .raw output! Exiting..."; exit 1; }
echo "mag .raw OK"
fi