You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1487 lines
47 KiB
1487 lines
47 KiB
diff --git a/.travis.yml b/.travis.yml
|
|
index fb795aa..e92c398 100644
|
|
--- a/.travis.yml
|
|
+++ b/.travis.yml
|
|
@@ -29,7 +29,7 @@ script:
|
|
- cd src && sox ../../wav/wia.wav -t raw -r 16000 - | ./lpcnet_enc -s | ./lpcnet_dec -s > /dev/null
|
|
# some LPCNet ctests
|
|
- ls -l
|
|
- - cd $BUILDDIR && ctest
|
|
+ - cd $BUILDDIR && ctest --output-on-failure
|
|
# Re-build codec2 with LPCNet and test FreeDV 2020 support
|
|
- cd $CODEC2DIR/build_linux
|
|
- make clean
|
|
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
|
index 680f52c..1d5b623 100644
|
|
--- a/CMakeLists.txt
|
|
+++ b/CMakeLists.txt
|
|
@@ -10,6 +10,7 @@ project(LPCNet C)
|
|
option(DISABLE_CPU_OPTIMIZATION "Disable CPU optimization discovery." OFF)
|
|
option(AVX2 "Enable AVX2 CPU optimizations." OFF)
|
|
option(AVX "Enable AVX CPU optimizations." OFF)
|
|
+option(SSE "Enable SSE CPU optimizations." OFF)
|
|
option(NEON "Enable NEON CPU optimizations for RPi." OFF)
|
|
|
|
include(GNUInstallDirs)
|
|
@@ -19,6 +20,11 @@ mark_as_advanced(CLEAR
|
|
CMAKE_INSTALL_LIBDIR
|
|
)
|
|
|
|
+# Build universal ARM64 and x86_64 binaries on Mac.
|
|
+if(BUILD_OSX_UNIVERSAL)
|
|
+set(CMAKE_OSX_ARCHITECTURES "x86_64;arm64")
|
|
+endif(BUILD_OSX_UNIVERSAL)
|
|
+
|
|
#
|
|
# Prevent in-source builds
|
|
# If an in-source build is attempted, you will still need to clean up a few
|
|
@@ -43,15 +49,41 @@ set(LPCNET_VERSION_MINOR 2)
|
|
set(LPCNET_VERSION_PATCH FALSE)
|
|
set(LPCNET_VERSION "${LPCNET_VERSION_MAJOR}.${LPCNET_VERSION_MINOR}")
|
|
# Patch level version bumps should not change API/ABI.
|
|
-set(SOVERSION "${LPCNET_VERSION_MAJOR}.${LPCNET_VERSION_MINOR}")
|
|
+set(LPCNET_SOVERSION "${LPCNET_VERSION_MAJOR}.${LPCNET_VERSION_MINOR}")
|
|
if(LPCNET_VERSION_PATCH)
|
|
set(LPCNET_VERSION "${LPCNET_VERSION}.${LPCNET_VERSION_PATCH}")
|
|
endif()
|
|
message(STATUS "LPCNet version: ${LPCNET_VERSION}")
|
|
|
|
+#
|
|
+# Find the git hash if this is a working copy.
|
|
+#
|
|
+if(EXISTS ${CMAKE_SOURCE_DIR}/.git)
|
|
+ find_package(Git QUIET)
|
|
+ if(Git_FOUND)
|
|
+ execute_process(
|
|
+ COMMAND "${GIT_EXECUTABLE}" describe --always HEAD
|
|
+ WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
|
+ RESULT_VARIABLE res
|
|
+ OUTPUT_VARIABLE FREEDV_HASH
|
|
+ ERROR_QUIET
|
|
+ OUTPUT_STRIP_TRAILING_WHITESPACE)
|
|
+ message(STATUS "freedv-gui current git hash: ${FREEDV_HASH}")
|
|
+ add_definitions(-DGIT_HASH="${FREEDV_HASH}")
|
|
+ else()
|
|
+ message(WARNING "Git not found. Can not determine current commit hash.")
|
|
+ add_definitions(-DGIT_HASH="Unknown")
|
|
+ endif()
|
|
+else()
|
|
+ add_definitions(-DGIT_HASH="None")
|
|
+endif()
|
|
+
|
|
# Set default flags
|
|
set(CMAKE_C_FLAGS "-Wall -W -Wextra -Wno-unused-function -O3 -g -I. -MD ${CMAKE_C_FLAGS} -DENABLE_ASSERTIONS")
|
|
|
|
+# Arch specific stuff here
|
|
+message(STATUS "Host system arch is: ${CMAKE_SYSTEM_PROCESSOR}")
|
|
+
|
|
# Detection of available CPU optimizations
|
|
if(NOT DISABLE_CPU_OPTIMIZATION)
|
|
if(UNIX AND NOT APPLE)
|
|
@@ -60,15 +92,25 @@ if(NOT DISABLE_CPU_OPTIMIZATION)
|
|
OUTPUT_VARIABLE AVX2)
|
|
execute_process(COMMAND grep -c "avx " /proc/cpuinfo
|
|
OUTPUT_VARIABLE AVX)
|
|
+ execute_process(COMMAND grep -c "sse4_1 " /proc/cpuinfo
|
|
+ OUTPUT_VARIABLE SSE)
|
|
execute_process(COMMAND grep -c "neon" /proc/cpuinfo
|
|
OUTPUT_VARIABLE NEON)
|
|
elseif(APPLE)
|
|
- # Under OSX we need to look through a few sysctl entries to determine what our CPU supports.
|
|
- message(STATUS "Looking for available CPU optimizations on an OSX system...")
|
|
- execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.leaf7_features COMMAND grep -c AVX2
|
|
- OUTPUT_VARIABLE AVX2)
|
|
- execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.features COMMAND grep -c AVX
|
|
- OUTPUT_VARIABLE AVX)
|
|
+ if(BUILD_OSX_UNIVERSAL)
|
|
+ # Presume AVX/AVX2 are enabled on the x86 side. The ARM side will auto-enable
|
|
+ # NEON optimizations by virtue of being aarch64.
|
|
+ set(AVX TRUE)
|
|
+ set(AVX2 TRUE)
|
|
+ set(SSE TRUE)
|
|
+ else()
|
|
+ # Under OSX we need to look through a few sysctl entries to determine what our CPU supports.
|
|
+ message(STATUS "Looking for available CPU optimizations on an OSX system...")
|
|
+ execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.leaf7_features COMMAND grep -c AVX2
|
|
+ OUTPUT_VARIABLE AVX2)
|
|
+ execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.features COMMAND grep -c AVX
|
|
+ OUTPUT_VARIABLE AVX)
|
|
+ endif(BUILD_OSX_UNIVERSAL)
|
|
elseif(WIN32)
|
|
message(STATUS "No detection capability on Windows, assuming AVX is available.")
|
|
set(AVX TRUE)
|
|
@@ -85,9 +127,13 @@ elseif(${AVX} OR ${AVX} GREATER 0)
|
|
# AVX2 machines will also match on AVX
|
|
message(STATUS "avx processor flags found or enabled.")
|
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx")
|
|
+elseif(${SSE} OR ${SSE} GREATER 0)
|
|
+# AVX and AVX2 machines will also match on SSE
|
|
+ message(STATUS "sse processor flags found or enabled.")
|
|
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1")
|
|
endif()
|
|
|
|
-# RPi
|
|
+# RPi / ARM 32bit
|
|
if(${NEON} OR ${NEON} GREATER 0)
|
|
message(STATUS "neon processor flags found or enabled.")
|
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -march=armv8-a -mtune=cortex-a53")
|
|
diff --git a/README.md b/README.md
|
|
index c446450..5b72d8c 100644
|
|
--- a/README.md
|
|
+++ b/README.md
|
|
@@ -25,14 +25,22 @@ LPCNet at 1733 bits/s using direct-split quantiser:
|
|
```
|
|
sox ../../wav/wia.wav -t raw -r 16000 - | ./lpcnet_enc -s | ./lpcnet_dec -s | aplay -f S16_LE -r 16000
|
|
```
|
|
-# CTests
|
|
+
|
|
+## Manually Selecting SIMD Technology
|
|
+
|
|
+Cmake will select the fastest SIMD available (AVX/SSSE/None), however you can manually select e.g.:
|
|
+```
|
|
+make -DDISABLE_CPU_OPTIMIZATION=ON -DSSE=ON -DCODEC2_BUILD_DIR=~/codec2/build_linux ..
|
|
+```
|
|
+
|
|
+## CTests
|
|
|
|
```
|
|
$ cd ~/LPCNet/build_linux
|
|
$ ctest
|
|
```
|
|
|
|
-Note, due to precision/library issues several tests (1-3) will only pass on certain machines such as Ubuntu 16 and 18, Ubuntu 17 is known to fail.
|
|
+Note, due to precision/library issues several tests (1-3) will [only pass on some machines](https://github.com/drowe67/LPCNet/issues/17).
|
|
|
|
# Reading Further
|
|
|
|
diff --git a/src/700c_train.sh b/src/700c_train.sh
|
|
new file mode 100755
|
|
index 0000000..3be057e
|
|
--- /dev/null
|
|
+++ b/src/700c_train.sh
|
|
@@ -0,0 +1,73 @@
|
|
+#!/bin/bash -x
|
|
+# 700c_train.sh
|
|
+# David Rowe March 2020
|
|
+# Experiments in LPCNet decoding of Codec 2 700C
|
|
+
|
|
+PATH=$HOME/codec2/build_linux/src:$HOME/LPCNet/build_linux/src:$HOME/LPCNet/src:$PATH
|
|
+
|
|
+if [ "$#" -ne 1 ]; then
|
|
+ echo "usage: ./700c_train.sh datestamp"
|
|
+ echo " ./700c_train.sh 200404"
|
|
+ exit 0
|
|
+fi
|
|
+
|
|
+train1=dev-clean-8k
|
|
+test1=test-clean-8k
|
|
+test2=all_speech_subset_8k
|
|
+test3=all_8k
|
|
+datestamp=$1
|
|
+epochs=30
|
|
+log=${1}.txt
|
|
+train=${datestamp}_train
|
|
+
|
|
+# synth "c2sim arg for experiment" "experiment label" "filename"
|
|
+synth() {
|
|
+ test=$3
|
|
+ c2sim ~/Downloads/${test}.sw --rateKWov ${test}.f32 ${1}
|
|
+ test_lpcnet --mag 2 --frame_size 80 --pre 0 ${test}.f32 ${datestamp}_${test}_${2}.sw
|
|
+}
|
|
+
|
|
+# experient "c2sim arg for experiment" "experiment label"
|
|
+experiment() {
|
|
+ echo "------------------------------------------------------------------------------"
|
|
+ echo "train starting" ${2}
|
|
+ echo "------------------------------------------------------------------------------"
|
|
+
|
|
+ c2sim ${train}.sw --ten_ms_centre ${train}_10ms.sw --rateKWov ${train}.f32 ${1}
|
|
+ sw2packedulaw --frame_size 80 ${train}_10ms.sw ${train}.f32 ${train}_10ms.pulaw
|
|
+
|
|
+ train_lpcnet.py ${train}.f32 ${train}_10ms.pulaw ${datestamp}_${2} --epochs ${epochs} --frame_size 80
|
|
+
|
|
+ dump_lpcnet.py ${datestamp}_${2}_${epochs}.h5
|
|
+ cp nnet_data.c src
|
|
+ make test_lpcnet
|
|
+
|
|
+ synth "${1}" "${2}" "${test1}"
|
|
+ synth "${1}" "${2}" "${test2}"
|
|
+ synth "${1}" "${2}" "${test3}"
|
|
+}
|
|
+
|
|
+rm -f $log
|
|
+
|
|
+(
|
|
+ date
|
|
+
|
|
+ # assemble some training speech
|
|
+ sox -r 8000 -c 1 ~/Downloads/${train1}.sw \
|
|
+ -t sw -r 8000 -c 1 ${train}.sw
|
|
+
|
|
+ # LPCNet with 10ms frames (similar to training data)
|
|
+ experiment "" "none"
|
|
+
|
|
+ # Codec 2 700C at 40ms frame rate (700 bits/s) from c2dec
|
|
+ c2enc 700C ~/Downloads/${test1}.sw - --eq --var | c2dec 700C - /dev/null --mlfeat ${test1}_dec4.f32
|
|
+ test_lpcnet --mag 2 --frame_size 80 --pre 0 ${test1}_dec4.f32 ${datestamp}_${test1}_40.sw
|
|
+ c2enc 700C ~/Downloads/${test2}.sw - --eq --var | c2dec 700C - /dev/null --mlfeat ${test2}_dec4.f32
|
|
+ test_lpcnet --mag 2 --frame_size 80 --pre 0 ${test2}_dec4.f32 ${datestamp}_${test2}_40.sw
|
|
+ c2enc 700C ~/Downloads/${test3}.sw - --eq --var | c2dec 700C - /dev/null --mlfeat ${test3}_dec4.f32
|
|
+ test_lpcnet --mag 2 --frame_size 80 --pre 0 ${test3}_dec4.f32 ${datestamp}_${test3}_40.sw
|
|
+
|
|
+ date
|
|
+) |& tee $log
|
|
+
|
|
+
|
|
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
|
|
index 41a78dc..0df4672 100644
|
|
--- a/src/CMakeLists.txt
|
|
+++ b/src/CMakeLists.txt
|
|
@@ -23,6 +23,8 @@ add_library(lpcnetfreedv SHARED ${lpcnet_freedv_srcs})
|
|
target_link_libraries(lpcnetfreedv codec2)
|
|
set_target_properties(lpcnetfreedv PROPERTIES
|
|
PUBLIC_HEADER lpcnet_freedv.h
|
|
+ VERSION ${LPCNET_VERSION}
|
|
+ SOVERSION ${LPCNET_SOVERSION}
|
|
)
|
|
target_include_directories(lpcnetfreedv INTERFACE
|
|
$<INSTALL_INTERFACE:include/lpcnet>
|
|
@@ -49,11 +51,11 @@ target_link_libraries(dump_data lpcnetfreedv m codec2)
|
|
add_executable(test_lpcnet test_lpcnet.c)
|
|
target_link_libraries(test_lpcnet lpcnetfreedv m codec2)
|
|
|
|
-if(AVX OR AVX2)
|
|
+if(SSE OR AVX OR AVX2 OR CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
|
|
add_executable(test_vec test_vec.c)
|
|
target_link_libraries(test_vec m)
|
|
else()
|
|
- message(WARNING "No AVX/AVX2 CPU flags identified, not building test_vec.")
|
|
+ message(WARNING "No SSE/AVX/AVX2 CPU flags identified, not building test_vec.")
|
|
endif()
|
|
|
|
add_executable(quant_feat quant_feat.c)
|
|
@@ -98,6 +100,12 @@ target_link_libraries(idct lpcnetfreedv m codec2)
|
|
add_executable(nnet2f32 nnet2f32.c)
|
|
target_link_libraries(nnet2f32 lpcnetfreedv m)
|
|
|
|
+add_executable(sw2packedulaw sw2packedulaw.c)
|
|
+target_link_libraries(sw2packedulaw lpcnetfreedv m)
|
|
+
|
|
+add_executable(thash thash.c)
|
|
+target_link_libraries(thash lpcnetfreedv m)
|
|
+
|
|
install(TARGETS lpcnet_enc lpcnet_dec
|
|
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
|
)
|
|
diff --git a/src/codec2_pitch.c b/src/codec2_pitch.c
|
|
index a267785..55fb5bc 100644
|
|
--- a/src/codec2_pitch.c
|
|
+++ b/src/codec2_pitch.c
|
|
@@ -113,6 +113,7 @@ int codec2_pitch_est(CODEC2_PITCH *pitch, float Sn[], float *f0, float *voicing)
|
|
|
|
void codec2_pitch_destroy(CODEC2_PITCH *pitch)
|
|
{
|
|
+ free(pitch->fft_fwd_cfg);
|
|
nlp_destroy(pitch->nlp_states);
|
|
free(pitch->w);
|
|
free(pitch);
|
|
diff --git a/src/concat.sh b/src/concat.sh
|
|
old mode 100644
|
|
new mode 100755
|
|
index 8369117..d98ccda
|
|
--- a/src/concat.sh
|
|
+++ b/src/concat.sh
|
|
@@ -1,6 +1,8 @@
|
|
-# Place in 16k-LP7 from TSPSpeech.iso and run to concatenate wave files
|
|
-# into one headerless training file
|
|
-for i in */*.wav
|
|
+#!/bin/bash
|
|
+# Concatenate .wav files into one headerless .sw training file
|
|
+# usage: ./concat.sh concatfile.sw
|
|
+
|
|
+for i in `find . -name '*.wav'`
|
|
do
|
|
sox $i -r 16000 -c 1 -t sw -
|
|
-done > input.s16
|
|
+done > $1
|
|
diff --git a/src/dump_data.c b/src/dump_data.c
|
|
index cd936cf..4e8d3c4 100644
|
|
--- a/src/dump_data.c
|
|
+++ b/src/dump_data.c
|
|
@@ -453,6 +453,7 @@ int main(int argc, char **argv) {
|
|
assert(pitch_index < 2*PITCH_MAX_PERIOD);
|
|
assert(pitch_index >= 2*PITCH_MIN_PERIOD);
|
|
features[2*NB_BANDS] = 0.01*(pitch_index-200);
|
|
+ //fprintf(stderr, "count: %d [36] %f pitch_index: %d\n", count, features[36], pitch_index);
|
|
if (c2voicing_en) features[2*NB_BANDS+1] = voicing;
|
|
}
|
|
fwrite(features, sizeof(float), NB_FEATURES, ffeat);
|
|
diff --git a/ext_pitch.sh b/src/ext_pitch.sh
|
|
similarity index 100%
|
|
rename from ext_pitch.sh
|
|
rename to src/ext_pitch.sh
|
|
diff --git a/src/flac_to_wav.sh b/src/flac_to_wav.sh
|
|
new file mode 100755
|
|
index 0000000..8f8aa29
|
|
--- /dev/null
|
|
+++ b/src/flac_to_wav.sh
|
|
@@ -0,0 +1,10 @@
|
|
+#!/bin/bash
|
|
+# Convert all .flac files under this folder to .wav files
|
|
+# source: several GitHub repos
|
|
+
|
|
+find . -iname "*.flac" | wc
|
|
+
|
|
+for flacfile in `find . -iname "*.flac"`
|
|
+do
|
|
+ ffmpeg -y -f flac -i $flacfile -ab 64k -ac 1 -ar 16000 -f wav "${flacfile%.*}.wav"
|
|
+done
|
|
diff --git a/src/freq.c b/src/freq.c
|
|
index c88d071..dbe94d9 100644
|
|
--- a/src/freq.c
|
|
+++ b/src/freq.c
|
|
@@ -140,6 +140,13 @@ static void check_init() {
|
|
common.init = 1;
|
|
}
|
|
|
|
+void freq_close() {
|
|
+ if (common.init) {
|
|
+ opus_fft_free(common.kfft,0);
|
|
+ common.init = 0;
|
|
+ }
|
|
+}
|
|
+
|
|
void dct(float *out, const float *in) {
|
|
int i;
|
|
check_init();
|
|
diff --git a/src/freq.h b/src/freq.h
|
|
index 0316edd..314eabd 100644
|
|
--- a/src/freq.h
|
|
+++ b/src/freq.h
|
|
@@ -42,6 +42,7 @@
|
|
|
|
#define NB_BANDS 18
|
|
|
|
+void freq_close(void);
|
|
void compute_band_energy(float *bandE, const kiss_fft_cpx *X);
|
|
void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P);
|
|
|
|
diff --git a/src/lpcnet.c b/src/lpcnet.c
|
|
index e117f1c..9f3f059 100644
|
|
--- a/src/lpcnet.c
|
|
+++ b/src/lpcnet.c
|
|
@@ -54,8 +54,10 @@ struct LPCNetState {
|
|
float old_lpc[FEATURES_DELAY][LPC_ORDER];
|
|
float old_gain[FEATURES_DELAY];
|
|
int frame_count;
|
|
+ float preemph;
|
|
float deemph_mem;
|
|
- FILE *ftest; /* used to dump states for automates tests */
|
|
+ int pitch_embedding;
|
|
+ FILE *ftest; /* used to dump states for automated tests */
|
|
};
|
|
|
|
|
|
@@ -118,6 +120,8 @@ LPCNetState *lpcnet_create()
|
|
lpcnet = (LPCNetState *)calloc(sizeof(LPCNetState), 1);
|
|
lpcnet->last_exc = 128;
|
|
lpcnet->ftest = NULL;
|
|
+ lpcnet->preemph = PREEMPH;
|
|
+ lpcnet->pitch_embedding = 1;
|
|
return lpcnet;
|
|
}
|
|
|
|
@@ -135,7 +139,15 @@ void lpcnet_open_test_file(LPCNetState *lpcnet, char file_name[]) {
|
|
}
|
|
}
|
|
|
|
-void lpcnet_synthesize(LPCNetState *lpcnet, short *output, const float *features, int N, int logmag)
|
|
+void lpcnet_set_preemph(LPCNetState *lpcnet, float preemph) {
|
|
+ lpcnet->preemph = preemph;
|
|
+}
|
|
+
|
|
+void lpcnet_set_pitch_embedding(LPCNetState *lpcnet, int val) {
|
|
+ lpcnet->pitch_embedding = val;
|
|
+}
|
|
+
|
|
+void lpcnet_synthesize(LPCNetState *lpcnet, short *output, float *features, int N, int mag)
|
|
{
|
|
static int count = 0;
|
|
int i;
|
|
@@ -149,13 +161,19 @@ void lpcnet_synthesize(LPCNetState *lpcnet, short *output, const float *features
|
|
static int start = 0; /*(LPC_ORDER+1*/;
|
|
/* FIXME: Do proper rounding once the Python code rounds properly. */
|
|
|
|
- pitch = (int)floor(.1 + 50*features[36]+100);
|
|
- assert(pitch >=0); assert(pitch <= 255);
|
|
- /* latest networks (using the codec 2 pitch estimator) are trained
|
|
- with pitch estimates between 40 and 255, but due to the pitch
|
|
- quantiser design and bit errors it's possible to get pitch
|
|
- values down to 32, which upsets the pitch embed matrix */
|
|
- if (pitch < 40) pitch = 40;
|
|
+ if (lpcnet->pitch_embedding) {
|
|
+ pitch = (int)floor(.1 + 50*features[36]+100);
|
|
+ //fprintf(stderr, "count: %d [36] %f pitch: %d\n", lpcnet->frame_count, features[36], pitch);
|
|
+ assert(pitch >=0); assert(pitch <= 255);
|
|
+ /* latest networks (using the codec 2 pitch estimator) are trained
|
|
+ with pitch estimates between 40 and 255, but due to the pitch
|
|
+ quantiser design and bit errors it's possible to get pitch
|
|
+ values down to 32, which upsets the pitch embed matrix */
|
|
+ if (pitch < 40) pitch = 40;
|
|
+ }
|
|
+ else {
|
|
+ pitch = 0;
|
|
+ }
|
|
|
|
pitch_gain = lpcnet->old_gain[FEATURES_DELAY-1];
|
|
memmove(&lpcnet->old_gain[1], &lpcnet->old_gain[0], (FEATURES_DELAY-1)*sizeof(lpcnet->old_gain[0]));
|
|
@@ -164,13 +182,30 @@ void lpcnet_synthesize(LPCNetState *lpcnet, short *output, const float *features
|
|
memcpy(lpc, lpcnet->old_lpc[FEATURES_DELAY-1], LPC_ORDER*sizeof(lpc[0]));
|
|
memmove(lpcnet->old_lpc[1], lpcnet->old_lpc[0], (FEATURES_DELAY-1)*LPC_ORDER*sizeof(lpc[0]));
|
|
|
|
- if (logmag) {
|
|
- float tmp[NB_BANDS];
|
|
+ switch (mag) {
|
|
+ case 0:
|
|
+ lpc_from_cepstrum(lpcnet->old_lpc[0], features);
|
|
+ break;
|
|
+ case 1:
|
|
+ {
|
|
+ float tmp[NB_BANDS];
|
|
for (i=0;i<NB_BANDS;i++) tmp[i] = pow(10.f, features[i]);
|
|
lpc_from_bands(lpcnet->old_lpc[0], tmp);
|
|
}
|
|
- else
|
|
- lpc_from_cepstrum(lpcnet->old_lpc[0], features);
|
|
+ break;
|
|
+ case 2:
|
|
+ for (i=0;i<LPC_ORDER;i++) {
|
|
+ lpcnet->old_lpc[0][i] = features[i+NB_BANDS];
|
|
+ }
|
|
+ break;
|
|
+ default:
|
|
+ assert(0);
|
|
+ }
|
|
+
|
|
+ /* We optinally use this part of feature vector to pass in LPCs,
|
|
+ * but we don't want any non zero values here hitting the
|
|
+ * frame rate network. TODO: better design */
|
|
+ RNN_CLEAR(&features[18], 18);
|
|
|
|
if (lpcnet->ftest) {
|
|
float pitch_f = pitch;
|
|
@@ -220,7 +255,7 @@ void lpcnet_synthesize(LPCNetState *lpcnet, short *output, const float *features
|
|
RNN_MOVE(&lpcnet->last_sig[1], &lpcnet->last_sig[0], LPC_ORDER-1);
|
|
lpcnet->last_sig[0] = pcm;
|
|
lpcnet->last_exc = exc;
|
|
- pcm += PREEMPH*lpcnet->deemph_mem;
|
|
+ pcm += lpcnet->preemph*lpcnet->deemph_mem;
|
|
lpcnet->deemph_mem = pcm;
|
|
if (pcm<-32767) pcm = -32767;
|
|
if (pcm>32767) pcm = 32767;
|
|
diff --git a/src/lpcnet.h b/src/lpcnet.h
|
|
index 70e849e..bd98a37 100644
|
|
--- a/src/lpcnet.h
|
|
+++ b/src/lpcnet.h
|
|
@@ -34,8 +34,10 @@
|
|
typedef struct LPCNetState LPCNetState;
|
|
LPCNetState *lpcnet_create();
|
|
void lpcnet_destroy(LPCNetState *lpcnet);
|
|
-void lpcnet_synthesize(LPCNetState *lpcnet, short *output, const float *features, int N, int logmag);
|
|
+void lpcnet_synthesize(LPCNetState *lpcnet, short *output, float *features, int N, int logmag);
|
|
|
|
void lpcnet_open_test_file(LPCNetState *lpcnet, char file_name[]);
|
|
+void lpcnet_set_preemph(LPCNetState *lpcnet, float preemph);
|
|
+void lpcnet_set_pitch_embedding(LPCNetState *lpcnet, int val);
|
|
|
|
#endif
|
|
diff --git a/src/lpcnet.py b/src/lpcnet.py
|
|
index 010f478..960e8c8 100644
|
|
--- a/src/lpcnet.py
|
|
+++ b/src/lpcnet.py
|
|
@@ -36,7 +36,6 @@ import numpy as np
|
|
import h5py
|
|
import sys
|
|
|
|
-frame_size = 160
|
|
pcm_bits = 8
|
|
embed_size = 128
|
|
pcm_levels = 2**pcm_bits
|
|
@@ -113,7 +112,7 @@ class PCMInit(Initializer):
|
|
'seed': self.seed
|
|
}
|
|
|
|
-def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 38, training=False, use_gpu=True):
|
|
+def new_lpcnet_model(frame_size = 160, rnn_units1=384, rnn_units2=16, nb_used_features = 38, training=False, use_gpu=True):
|
|
pcm = Input(shape=(None, 3))
|
|
feat = Input(shape=(None, nb_used_features))
|
|
pitch = Input(shape=(None, 1))
|
|
diff --git a/src/lpcnet_dump.c b/src/lpcnet_dump.c
|
|
index d8a8409..58f9c98 100644
|
|
--- a/src/lpcnet_dump.c
|
|
+++ b/src/lpcnet_dump.c
|
|
@@ -87,7 +87,8 @@ static DenoiseState *rnnoise_create() {
|
|
}
|
|
|
|
static void rnnoise_destroy(DenoiseState *st) {
|
|
- free(st);
|
|
+ freq_close();
|
|
+ free(st);
|
|
}
|
|
|
|
static short float2short(float x)
|
|
diff --git a/src/lpcnet_freedv.c b/src/lpcnet_freedv.c
|
|
index 823fcdc..fe154ea 100644
|
|
--- a/src/lpcnet_freedv.c
|
|
+++ b/src/lpcnet_freedv.c
|
|
@@ -80,3 +80,9 @@ void lpcnet_dec(LPCNetFreeDV *lf, char *frame, short* pcm)
|
|
|
|
int lpcnet_samples_per_frame(LPCNetFreeDV *lf) { return FRAME_SIZE*lf->q->dec; }
|
|
int lpcnet_bits_per_frame(LPCNetFreeDV *lf) { return lf->q->bits_per_frame; }
|
|
+
|
|
+static char git_hash[] = GIT_HASH;
|
|
+char *lpcnet_get_hash(void) {
|
|
+ return git_hash;
|
|
+}
|
|
+
|
|
diff --git a/src/lpcnet_freedv.h b/src/lpcnet_freedv.h
|
|
index 43c8298..874f7cc 100644
|
|
--- a/src/lpcnet_freedv.h
|
|
+++ b/src/lpcnet_freedv.h
|
|
@@ -8,6 +8,10 @@
|
|
#ifndef __LPCNET_FREEDV__
|
|
#define __LPCNET_FREEDV__
|
|
|
|
+#ifdef __cplusplus
|
|
+ extern "C" {
|
|
+#endif
|
|
+
|
|
typedef struct LPCNetFreeDV LPCNetFreeDV;
|
|
|
|
LPCNetFreeDV* lpcnet_freedv_create(int direct_split);
|
|
@@ -16,5 +20,10 @@ void lpcnet_enc(LPCNetFreeDV *lf, short *pcm, char *frame);
|
|
void lpcnet_dec(LPCNetFreeDV *lf, char *frame, short* pcm);
|
|
int lpcnet_bits_per_frame(LPCNetFreeDV *lf);
|
|
int lpcnet_samples_per_frame(LPCNetFreeDV *lf);
|
|
+char *lpcnet_get_hash(void);
|
|
+
|
|
+#ifdef __cplusplus
|
|
+}
|
|
+#endif
|
|
|
|
#endif
|
|
diff --git a/src/nnet.c b/src/nnet.c
|
|
index 8ad4a26..1da7d70 100644
|
|
--- a/src/nnet.c
|
|
+++ b/src/nnet.c
|
|
@@ -43,7 +43,9 @@
|
|
|
|
#ifdef __AVX__
|
|
#include "vec_avx.h"
|
|
-#elif __ARM_NEON__
|
|
+#elif __SSE__
|
|
+#include "vec_sse.h"
|
|
+#elif __ARM_NEON__ || __aarch64__
|
|
#include "vec_neon.h"
|
|
#else
|
|
#warning Compiling without any vectorization. This code will be very slow
|
|
diff --git a/src/plot_lpc.m b/src/plot_lpc.m
|
|
new file mode 100644
|
|
index 0000000..3b814be
|
|
--- /dev/null
|
|
+++ b/src/plot_lpc.m
|
|
@@ -0,0 +1,50 @@
|
|
+% plot_lpc.m
|
|
+% David Rowe April 2020
|
|
+%
|
|
+% Visualise LPC spectra for 700C decoder experiments
|
|
+
|
|
+Fs = 8000; % speech sample rate
|
|
+Fsf = 100; % frame sample rate
|
|
+nb_features = 55;
|
|
+nb_rateK = 18; % number of rateK (log amplitude) features
|
|
+nb_lpc = 10; % number of LPCs
|
|
+
|
|
+function plot_against_time(v, st_sec, en_sec, Fs, leg='b')
|
|
+ st = Fs*st_sec; en = Fs*en_sec;
|
|
+ t = st_sec:1/Fs:en_sec;
|
|
+ plot(t,v(st+1:en+1),leg);
|
|
+endfunction
|
|
+
|
|
+function mesh_against_time(m, st_sec, en_sec, Fs)
|
|
+ st = Fs*st_sec; en = Fs*en_sec;
|
|
+ t = st_sec:1/Fs:en_sec;
|
|
+ mesh(m(st+1:en+1,:));
|
|
+endfunction
|
|
+
|
|
+function mesh_aks_against_time(aks, st_sec, en_sec, Fs)
|
|
+ st = Fs*st_sec; en = Fs*en_sec;
|
|
+ t = st_sec:1/Fs:en_sec;
|
|
+ aks = aks(st+1:en+1,:); A = [];
|
|
+ for f=1:length(aks)
|
|
+ A = [A freqz(1,[1 aks(f,:)],64)];
|
|
+ end
|
|
+ AdB = 20*log10(abs(A));
|
|
+ max(AdB(:))
|
|
+ mesh(AdB);
|
|
+endfunction
|
|
+
|
|
+# plots of speech (input), rateK vectors, LPC spectra
|
|
+
|
|
+features=load_f32("../build_linux/all_8k.f32", nb_features);
|
|
+rateK=features(:, 1:nb_rateK);
|
|
+aks = features(:, nb_rateK+1:nb_rateK+nb_lpc);
|
|
+fs=fopen("../build_linux/all_8k_10ms.sw","rb");
|
|
+s = fread(fs,Inf,"short");
|
|
+fclose(fs);
|
|
+
|
|
+st_sec=14; en_sec=16;
|
|
+
|
|
+figure(1); clf; plot_against_time(s, st_sec, en_sec, Fs, 'b')
|
|
+figure(2); clf; mesh_against_time(rateK, st_sec, en_sec, Fsf);
|
|
+figure(3); clf; mesh_aks_against_time(aks, st_sec, en_sec, Fsf);
|
|
+
|
|
diff --git a/src/plot_pulaw.py b/src/plot_pulaw.py
|
|
new file mode 100755
|
|
index 0000000..10d5656
|
|
--- /dev/null
|
|
+++ b/src/plot_pulaw.py
|
|
@@ -0,0 +1,52 @@
|
|
+#!/usr/bin/python3
|
|
+# Utility to inspect packed ulaw samples from sw2packedulaw.c (or dump_data.c) before training
|
|
+
|
|
+import numpy as np
|
|
+import matplotlib.pyplot as plt
|
|
+import sys
|
|
+import ulaw
|
|
+import argparse
|
|
+
|
|
+parser = argparse.ArgumentParser(description='Plot LPCNet training packed ulaw samples')
|
|
+parser.add_argument('file1', help='pulaw file of packed ulaw samples')
|
|
+parser.add_argument('--file2', help='optional second packed ulaw file to compare')
|
|
+parser.add_argument('--nb_samples', type=int, default=-1, help='Optional number of samples to plot')
|
|
+args = parser.parse_args()
|
|
+
|
|
+data = np.fromfile(args.file1, dtype='uint8')
|
|
+nb_samples = args.nb_samples
|
|
+data = data[:nb_samples]
|
|
+
|
|
+sig = np.array(data[0::4], dtype='float')
|
|
+pred = np.array(data[1::4], dtype='float')
|
|
+in_exc = np.array(data[2::4], dtype='float')
|
|
+out_exc = np.array(data[3::4], dtype='float')
|
|
+
|
|
+print("exc var: %4.3e" % (np.var(ulaw.ulaw2lin(in_exc))))
|
|
+
|
|
+plt.figure(1)
|
|
+plt.subplot(211)
|
|
+plt.plot(ulaw.ulaw2lin(sig), label='sig')
|
|
+plt.ylim((-30000,30000))
|
|
+plt.legend()
|
|
+plt.subplot(212)
|
|
+plt.plot(ulaw.ulaw2lin(pred), label='pred')
|
|
+plt.ylim((-30000,30000))
|
|
+plt.legend()
|
|
+plt.show(block=False)
|
|
+
|
|
+plt.figure(2)
|
|
+plt.subplot(211)
|
|
+plt.plot(ulaw.ulaw2lin(in_exc), label='in_exc')
|
|
+if args.file2:
|
|
+ data2 = np.fromfile(args.file2, dtype='uint8')
|
|
+ data2 = data2[:nb_samples]
|
|
+ in_exc2 = np.array(data2[2::4], dtype='float')
|
|
+ plt.plot(ulaw.ulaw2lin(in_exc2), label='in_exc2')
|
|
+plt.ylim((-30000,30000))
|
|
+plt.legend()
|
|
+plt.subplot(212)
|
|
+plt.plot(ulaw.ulaw2lin(out_exc), label='out_exc')
|
|
+plt.ylim((-30000,30000))
|
|
+plt.legend()
|
|
+plt.show()
|
|
diff --git a/src/plot_train.py b/src/plot_train.py
|
|
index 910d7e9..7e2bc7b 100644
|
|
--- a/src/plot_train.py
|
|
+++ b/src/plot_train.py
|
|
@@ -3,11 +3,10 @@ import numpy as np
|
|
import sys
|
|
|
|
loss = np.loadtxt(sys.argv[1])
|
|
-delta_loss = (loss[1:,0]-loss[:-1,0])/loss[1:,0]
|
|
+delta_loss = (loss[1:]-loss[:-1])/loss[1:]
|
|
|
|
plt.figure(1)
|
|
-plt.plot(loss[:,0],'r')
|
|
-plt.plot(loss[:,1],'g')
|
|
+plt.plot(loss[:],'r')
|
|
plt.title('loss')
|
|
plt.show(block=False)
|
|
plt.figure(2)
|
|
diff --git a/src/plot_train.sh b/src/plot_train.sh
|
|
index 2a1fddf..3c86094 100755
|
|
--- a/src/plot_train.sh
|
|
+++ b/src/plot_train.sh
|
|
@@ -6,5 +6,5 @@
|
|
# plot graphs of loss and spares categorical accuracy to get a feel
|
|
# for progress while training
|
|
|
|
-grep loss $1 | sed -n 's/.*===\].*loss: \(.*\) - val_loss: \(.*\)/\1 \2/p' > loss.txt
|
|
-python3 plot_train.py loss.txt
|
|
+grep loss $1 | sed -n 's/.*===\].*step - loss: \(.*\)/\1/p' > loss.txt
|
|
+python3 ~/LPCNet/src/plot_train.py loss.txt
|
|
diff --git a/process.sh b/src/process.sh
|
|
similarity index 100%
|
|
rename from process.sh
|
|
rename to src/process.sh
|
|
diff --git a/src/sw2packedulaw.c b/src/sw2packedulaw.c
|
|
new file mode 100644
|
|
index 0000000..7724158
|
|
--- /dev/null
|
|
+++ b/src/sw2packedulaw.c
|
|
@@ -0,0 +1,188 @@
|
|
+/*
|
|
+ sw2packedulaw.c
|
|
+
|
|
+ Convert signed word samples to packed ulaw samples to drive LPCNet
|
|
+ training, this code is a cut/paste from dump_data.c witha few other
|
|
+ options.
|
|
+
|
|
+ By varying the LPC predictor coefficients we can try no predictor,
|
|
+ first order, and regular LPC.
|
|
+
|
|
+ 1. No prediction (WaveRNN I guess):
|
|
+ $ ~/codec2/build_linux/src/c2sim ~/Downloads/all_8k.sw --ten_ms_centre all_8k_10ms.sw --rateKWov all_8k.f32
|
|
+ $ ./src/sw2packedulaw --frame_size 80 all_8k_10ms.sw all_8k.f32 all_8k_none.pulaw
|
|
+ $ ../src/plot_pulaw.py all_8k_none.pulaw
|
|
+
|
|
+ 2. First order predictor:
|
|
+ $ ~/codec2/build_linux/src/c2sim ~/Downloads/all_8k.sw --ten_ms_centre all_8k_10ms.sw --rateKWov all_8k.f32 --first
|
|
+ $ ./src/sw2packedulaw --frame_size 80 all_8k_10ms.sw all_8k.f32 all_8k_first.pulaw
|
|
+
|
|
+ 3. LPC with ulaw Q in the loop and noise injection (standard LPCNet design):
|
|
+ $ ~/codec2/build_linux/src/c2sim ~/Downloads/all_8k.sw --ten_ms_centre all_8k_10ms.sw --rateKWov all_8k.f32 --lpc 10
|
|
+ $ ./src/sw2packedulaw --frame_size 80all_8k_10ms.sw all_8k.f32 all_8k.pulaw
|
|
+
|
|
+ 4. LPC with no Q in the loop or noise injection (linear):
|
|
+ $ ./src/sw2packedulaw --frame_size 80 --linear all_8k_10ms.sw all_8k.f32 all_8k_linear.pulaw
|
|
+
|
|
+ See plot_pulaw.py to inspect output .pulaw files
|
|
+*/
|
|
+
|
|
+#include <stdlib.h>
|
|
+#include <stdio.h>
|
|
+#include "common.h"
|
|
+#include <math.h>
|
|
+#include "freq.h"
|
|
+#include "pitch.h"
|
|
+#include "arch.h"
|
|
+#include "celt_lpc.h"
|
|
+#include <assert.h>
|
|
+#include <getopt.h>
|
|
+
|
|
+#define NB_FEATURES 55
|
|
+#define CODEC2_LPC_ORDER 10
|
|
+
|
|
+typedef struct {
|
|
+ float lpc[LPC_ORDER];
|
|
+ float sig_mem[LPC_ORDER];
|
|
+ int exc_mem;
|
|
+} DenoiseState;
|
|
+
|
|
+void write_audio(DenoiseState *st, const short *pcm, float noise_std, FILE *file, int frame_size) {
|
|
+ int i;
|
|
+ unsigned char data[4*frame_size];
|
|
+ for (i=0;i<frame_size;i++) {
|
|
+ int noise;
|
|
+ float p=0;
|
|
+ float e;
|
|
+ int j;
|
|
+ for (j=0;j<LPC_ORDER;j++) p -= st->lpc[j]*st->sig_mem[j];
|
|
+ e = lin2ulaw(pcm[i] - p);
|
|
+ /* Signal. */
|
|
+ data[4*i] = lin2ulaw(st->sig_mem[0]);
|
|
+ /* Prediction. */
|
|
+ data[4*i+1] = lin2ulaw(p);
|
|
+ /* Excitation in. */
|
|
+ data[4*i+2] = st->exc_mem;
|
|
+ /* Excitation out. */
|
|
+ data[4*i+3] = e;
|
|
+ /* Simulate error on excitation. */
|
|
+ noise = (int)floor(.5 + noise_std*.707*(log_approx((float)rand()/RAND_MAX)-log_approx((float)rand()/RAND_MAX)));
|
|
+ e += noise;
|
|
+ e = IMIN(255, IMAX(0, e));
|
|
+
|
|
+ RNN_MOVE(&st->sig_mem[1], &st->sig_mem[0], LPC_ORDER-1);
|
|
+ st->sig_mem[0] = p + ulaw2lin(e);
|
|
+ st->exc_mem = e;
|
|
+ }
|
|
+ fwrite(data, 4*frame_size, 1, file);
|
|
+}
|
|
+
|
|
+/* takes ulaw out of predictor path, and no noise injection */
|
|
+void write_audio_linear(DenoiseState *st, const short *pcm, FILE *file, int frame_size) {
|
|
+ int i;
|
|
+ unsigned char data[4*frame_size];
|
|
+ for (i=0;i<frame_size;i++) {
|
|
+ float p=0;
|
|
+ float e;
|
|
+ int j;
|
|
+ for (j=0;j<LPC_ORDER;j++) p -= st->lpc[j]*st->sig_mem[j];
|
|
+ e = pcm[i] - p;
|
|
+ //fprintf(stderr,"pcm: %d p: %f e: %f\n", pcm[i], p, e);
|
|
+ /* Signal. */
|
|
+ data[4*i] = lin2ulaw(st->sig_mem[0]);
|
|
+ /* Prediction. */
|
|
+ data[4*i+1] = lin2ulaw(p);
|
|
+ /* Excitation in. */
|
|
+ data[4*i+2] = st->exc_mem;
|
|
+ /* Excitation out. */
|
|
+ data[4*i+3] = lin2ulaw(e);
|
|
+
|
|
+ RNN_MOVE(&st->sig_mem[1], &st->sig_mem[0], LPC_ORDER-1);
|
|
+ st->sig_mem[0] = pcm[i];
|
|
+ st->exc_mem = lin2ulaw(e);
|
|
+ }
|
|
+ fwrite(data, 4*frame_size, 1, file);
|
|
+}
|
|
+
|
|
+int main(int argc, char *argv[]) {
|
|
+ int linear = 0;
|
|
+ int frame_size = FRAME_SIZE;
|
|
+
|
|
+ DenoiseState st;
|
|
+ memset(&st, 0, sizeof(DenoiseState));
|
|
+ st.exc_mem = 128;
|
|
+
|
|
+ int o = 0;
|
|
+ int opt_idx = 0;
|
|
+ while( o != -1 ) {
|
|
+ static struct option long_opts[] = {
|
|
+ {"linear", no_argument, 0, 'l'},
|
|
+ {"frame_size", required_argument, 0, 'f'},
|
|
+ {0, 0, 0, 0}
|
|
+ };
|
|
+
|
|
+ o = getopt_long(argc,argv,"l",long_opts,&opt_idx);
|
|
+
|
|
+ switch(o){
|
|
+ case 'f':
|
|
+ frame_size = atoi(optarg);
|
|
+ fprintf(stderr, "frame_size: %d\n", frame_size);
|
|
+ break;
|
|
+ case 'l':
|
|
+ linear = 1;
|
|
+ break;
|
|
+ case '?':
|
|
+ goto helpmsg;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ int dx = optind;
|
|
+
|
|
+ if ((argc - dx) < 3) {
|
|
+ helpmsg:
|
|
+ fprintf(stderr, "usage: s2packedulaw Input.s16 FeatureFile.f32 Output.pulaw\n");
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ FILE *fsw = fopen(argv[dx], "rb");
|
|
+ if (fsw == NULL) {
|
|
+ fprintf(stderr, "Can't open %s\n", argv[dx]);
|
|
+ exit(1);
|
|
+ }
|
|
+
|
|
+ FILE *ffeature = fopen(argv[dx+1], "rb");
|
|
+ if (ffeature == NULL) {
|
|
+ fprintf(stderr, "Can't open %s\n", argv[dx+1]);
|
|
+ exit(1);
|
|
+ }
|
|
+
|
|
+ FILE *fpackedpcm = fopen(argv[dx+2], "wb");
|
|
+ if (fpackedpcm == NULL) {
|
|
+ fprintf(stderr, "Can't open %s\n", argv[dx+2]);
|
|
+ exit(1);
|
|
+ }
|
|
+
|
|
+ short frame[frame_size];
|
|
+ while (fread(frame, sizeof(short), frame_size, fsw) == (unsigned)frame_size) {
|
|
+ float features[NB_FEATURES];
|
|
+ int ret = fread(features, sizeof(float), NB_FEATURES, ffeature);
|
|
+ if (ret != NB_FEATURES) {
|
|
+ fprintf(stderr, "feature file ended early!\n");
|
|
+ exit(1);
|
|
+ }
|
|
+ for(int i=0; i<CODEC2_LPC_ORDER; i++) {
|
|
+ st.lpc[i] = features[18+i];
|
|
+ }
|
|
+ if (linear)
|
|
+ write_audio_linear(&st, frame, fpackedpcm, frame_size);
|
|
+ else {
|
|
+ write_audio(&st, frame, 0.5, fpackedpcm, frame_size);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ fclose(fsw);
|
|
+ fclose(ffeature);
|
|
+ fclose(fpackedpcm);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
diff --git a/src/test_lpcnet.c b/src/test_lpcnet.c
|
|
index 0a34729..e8c9907 100644
|
|
--- a/src/test_lpcnet.c
|
|
+++ b/src/test_lpcnet.c
|
|
@@ -36,26 +36,37 @@
|
|
int main(int argc, char **argv) {
|
|
FILE *fin, *fout;
|
|
LPCNetState *net;
|
|
- int logmag = 0;
|
|
-
|
|
+ int mag = 0;
|
|
+ int frame_size = FRAME_SIZE;
|
|
+
|
|
net = lpcnet_create();
|
|
|
|
int o = 0;
|
|
int opt_idx = 0;
|
|
while( o != -1 ) {
|
|
static struct option long_opts[] = {
|
|
- {"mag", no_argument, 0, 'i'},
|
|
- {"nnet", required_argument, 0, 'n'},
|
|
+ {"frame_size", required_argument, 0, 'f'},
|
|
{"logstates", required_argument, 0, 'l'},
|
|
- {0, 0, 0, 0}
|
|
+ {"mag", required_argument, 0, 'i'},
|
|
+ {"nnet", required_argument, 0, 'n'},
|
|
+ {"no_pitch_embedding", no_argument, 0, 'e'},
|
|
+ {"pre", required_argument, 0, 'p'},
|
|
+ {0, 0, 0, 0}
|
|
};
|
|
|
|
o = getopt_long(argc,argv,"ihn:l:",long_opts,&opt_idx);
|
|
|
|
switch(o){
|
|
+ case 'e':
|
|
+ lpcnet_set_pitch_embedding(net, 0);
|
|
+ break;
|
|
+ case 'f':
|
|
+ frame_size = atoi(optarg);
|
|
+ fprintf(stderr, "frame_size: %d\n", frame_size);
|
|
+ break;
|
|
case 'i':
|
|
- logmag = 1;
|
|
- fprintf(stderr, "logmag: %d\n", logmag);
|
|
+ mag = atoi(optarg);
|
|
+ fprintf(stderr, "mag: %d\n", mag);
|
|
break;
|
|
case 'l':
|
|
fprintf(stderr, "logstates file: %s\n", optarg);
|
|
@@ -65,6 +76,10 @@ int main(int argc, char **argv) {
|
|
fprintf(stderr, "loading nnet: %s\n", optarg);
|
|
nnet_read(optarg);
|
|
break;
|
|
+ case 'p':
|
|
+ if (atoi(optarg) == 0)
|
|
+ lpcnet_set_preemph(net, 0.0);
|
|
+ break;
|
|
case '?':
|
|
goto helpmsg;
|
|
break;
|
|
@@ -74,7 +89,9 @@ int main(int argc, char **argv) {
|
|
|
|
if ((argc - dx) < 2) {
|
|
helpmsg:
|
|
- fprintf(stderr, "usage: test_lpcnet [--mag] [--logstates statesfile] [--nnet lpcnet_xxx.f32] <features.f32> <output.pcm>\n");
|
|
+ fprintf(stderr, "usage: test_lpcnet [--mag 1|2] [--logstates statesfile] [--nnet lpcnet_xxx.f32]"
|
|
+ " [--framesize samples] [--pre 0|1] <features.f32> <output.s16>\n");
|
|
+ fprintf(stderr, "--mag -i 0-cepstrals, 1-logmag, 2-disable LPC (WaveRNN)\n");
|
|
return 0;
|
|
}
|
|
|
|
@@ -99,13 +116,12 @@ int main(int argc, char **argv) {
|
|
while (1) {
|
|
float in_features[NB_TOTAL_FEATURES];
|
|
float features[NB_FEATURES];
|
|
- short pcm[FRAME_SIZE];
|
|
+ short pcm[frame_size];
|
|
int nread = fread(in_features, sizeof(features[0]), NB_TOTAL_FEATURES, fin);
|
|
if (nread != NB_TOTAL_FEATURES) break;
|
|
RNN_COPY(features, in_features, NB_FEATURES);
|
|
- RNN_CLEAR(&features[18], 18);
|
|
- lpcnet_synthesize(net, pcm, features, FRAME_SIZE, logmag);
|
|
- fwrite(pcm, sizeof(pcm[0]), FRAME_SIZE, fout);
|
|
+ lpcnet_synthesize(net, pcm, features, frame_size, mag);
|
|
+ fwrite(pcm, sizeof(pcm[0]), frame_size, fout);
|
|
if (fout == stdout) fflush(stdout);
|
|
}
|
|
fclose(fin);
|
|
diff --git a/src/test_vec.c b/src/test_vec.c
|
|
index 09b51e7..efa617e 100644
|
|
--- a/src/test_vec.c
|
|
+++ b/src/test_vec.c
|
|
@@ -26,7 +26,10 @@ const char simd[]="AVX2";
|
|
#else
|
|
const char simd[]="AVX";
|
|
#endif
|
|
-#elif __ARM_NEON__
|
|
+#elif __SSE__
|
|
+#include "vec_sse.h"
|
|
+const char simd[]="SSE";
|
|
+#elif __ARM_NEON__ || __aarch64__
|
|
#include "vec_neon.h"
|
|
const char simd[]="NEON";
|
|
#else
|
|
diff --git a/src/thash.c b/src/thash.c
|
|
new file mode 100644
|
|
index 0000000..5b60f2e
|
|
--- /dev/null
|
|
+++ b/src/thash.c
|
|
@@ -0,0 +1,19 @@
|
|
+/*---------------------------------------------------------------------------*\
|
|
+
|
|
+ FILE........: thash.c
|
|
+ AUTHOR......: David Rowe
|
|
+ DATE CREATED: July 2020
|
|
+
|
|
+ Simple test program for LPCNet API get hash function
|
|
+
|
|
+\*---------------------------------------------------------------------------*/
|
|
+
|
|
+#include <stdio.h>
|
|
+#include "lpcnet_freedv.h"
|
|
+
|
|
+int main(void) {
|
|
+ printf("%s\n", lpcnet_get_hash());
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
diff --git a/train_direct.sh b/src/train_direct.sh
|
|
similarity index 100%
|
|
rename from train_direct.sh
|
|
rename to src/train_direct.sh
|
|
diff --git a/src/train_lpcnet.py b/src/train_lpcnet.py
|
|
index 62abbd7..94ab9a8 100755
|
|
--- a/src/train_lpcnet.py
|
|
+++ b/src/train_lpcnet.py
|
|
@@ -35,9 +35,14 @@ from keras.callbacks import ModelCheckpoint
|
|
from ulaw import ulaw2lin, lin2ulaw
|
|
import keras.backend as K
|
|
import h5py
|
|
-
|
|
+import argparse
|
|
+import os
|
|
import tensorflow as tf
|
|
from keras.backend.tensorflow_backend import set_session
|
|
+import matplotlib.pyplot as plt
|
|
+
|
|
+# less verbose tensorflow ....
|
|
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
|
config = tf.ConfigProto()
|
|
|
|
# use this option to reserve GPU memory, e.g. for running more than
|
|
@@ -46,23 +51,38 @@ config = tf.ConfigProto()
|
|
|
|
set_session(tf.Session(config=config))
|
|
|
|
-nb_epochs = 10
|
|
-
|
|
# Try reducing batch_size if you run out of memory on your GPU
|
|
batch_size = 32
|
|
+# with of feature records used for training
|
|
+nb_features = 55
|
|
+
|
|
+parser = argparse.ArgumentParser(description='LPCNet training')
|
|
+parser.add_argument('feature_file', help='.f32 file of float features')
|
|
+parser.add_argument('packed_ulaw_file', help='file of 4 multiplexed ulaw samples per speech sample')
|
|
+parser.add_argument('prefix', help='.h5 file prefix to easily identify each experiment')
|
|
+parser.add_argument('--frame_size', type=int, default=160, help='frames size in samples')
|
|
+parser.add_argument('--epochs', type=int, default=20, help='Number of training epochs')
|
|
+parser.add_argument('--no_pitch_embedding', action='store_true', help='disable pitch embedding')
|
|
+parser.add_argument('--load_h5', help='disable pitch embedding')
|
|
+args = parser.parse_args()
|
|
|
|
-model, _, _ = lpcnet.new_lpcnet_model(training=True)
|
|
+nb_epochs = args.epochs
|
|
+
|
|
+model, _, _ = lpcnet.new_lpcnet_model(frame_size=args.frame_size, training=True)
|
|
|
|
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
|
|
model.summary()
|
|
|
|
-feature_file = sys.argv[1]
|
|
-pcm_file = sys.argv[2] # 16 bit unsigned short PCM samples
|
|
-prefix = sys.argv[3] # prefix to put on .h5 files to easily name each experiment
|
|
+if args.load_h5:
|
|
+ print("loading: %s" % (args.load_h5))
|
|
+ model.load_weights(args.load_h5)
|
|
+
|
|
+feature_file = args.feature_file
|
|
+pcm_file = args.packed_ulaw_file
|
|
+prefix = args.prefix
|
|
frame_size = model.frame_size
|
|
-nb_features = 55
|
|
nb_used_features = model.nb_used_features
|
|
-feature_chunk_size = 15
|
|
+feature_chunk_size = 15 # time window for conv1d/receptive field
|
|
pcm_chunk_size = frame_size*feature_chunk_size
|
|
|
|
# u for unquantised, load 16 bit PCM samples and convert to mu-law
|
|
@@ -84,7 +104,17 @@ in_exc = np.reshape(data[2::4], (nb_frames, pcm_chunk_size, 1))
|
|
out_exc = np.reshape(data[3::4], (nb_frames, pcm_chunk_size, 1))
|
|
del data
|
|
|
|
-print("ulaw std = ", np.std(out_exc))
|
|
+"""
|
|
+# plot ulaw signals to sanity check
|
|
+testf=10
|
|
+print(sig.shape)
|
|
+#plt.plot(sig[testf,:],label="sig")
|
|
+#plt.plot(pred[testf,:],label="pred")
|
|
+plt.plot(in_exc[testf,:],label="in_exc")
|
|
+plt.plot(out_exc[testf,:],label="out_exc")
|
|
+plt.legend()
|
|
+plt.show()
|
|
+"""
|
|
|
|
features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features))
|
|
features = features[:, :, :nb_used_features]
|
|
@@ -93,12 +123,34 @@ features = features[:, :, :nb_used_features]
|
|
# nb_used_features=38, so 0...37, so lpc-gain not used
|
|
features[:,:,18:36] = 0 # zero out 18..35, so pitch and pitch gain being fed in, lpc gain ignored
|
|
|
|
+"""
|
|
+# plot features to sanity check
|
|
+print(features.shape)
|
|
+testf=10
|
|
+plt.plot(features[testf,:,37:38])
|
|
+plt.show()
|
|
+"""
|
|
+
|
|
fpad1 = np.concatenate([features[0:1, 0:2, :], features[:-1, -2:, :]], axis=0)
|
|
fpad2 = np.concatenate([features[1:, :2, :], features[0:1, -2:, :]], axis=0)
|
|
features = np.concatenate([fpad1, features, fpad2], axis=1)
|
|
|
|
-# pitch feature uses as well as cesptrals
|
|
+# pitch feature uses as well as cepstrals
|
|
periods = (.1 + 50*features[:,:,36:37]+100).astype('int16')
|
|
+print(periods.shape)
|
|
+if args.no_pitch_embedding:
|
|
+ print("no_pitch_embedding")
|
|
+ periods[:] = 0
|
|
+# sanity check training data aginst pitch embedding range
|
|
+assert np.all(periods >= 40), "pitch embedding < 40"
|
|
+assert np.all(periods < 256), "pitch embeddeding > 255"
|
|
+
|
|
+"""
|
|
+# plot pitch to sanity check
|
|
+print(features.shape, periods.shape)
|
|
+plt.plot(periods.reshape(-1)[:1000])
|
|
+plt.show()
|
|
+"""
|
|
|
|
in_data = np.concatenate([sig, pred, in_exc], axis=-1)
|
|
|
|
@@ -108,9 +160,8 @@ del in_exc
|
|
|
|
# dump models to disk as we go
|
|
#checkpoint = ModelCheckpoint('lpcnet20h_384_10_G16_{epoch:02d}.h5')
|
|
-checkpoint = ModelCheckpoint(prefix + '_{epoch:02d}.h5')
|
|
+checkpoint = ModelCheckpoint(prefix + '_{epoch:d}.h5')
|
|
|
|
# use this to reload a partially trained model
|
|
-#model.load_weights('lpcnet_190203_07.h5')
|
|
model.compile(optimizer=Adam(0.001, amsgrad=True, decay=5e-5), loss='sparse_categorical_crossentropy')
|
|
-model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, validation_split=0.1, callbacks=[checkpoint, lpcnet.Sparsify(2000, 40000, 400, (0.05, 0.05, 0.2))])
|
|
+model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, callbacks=[checkpoint, lpcnet.Sparsify(2000, 40000, 400, (0.05, 0.05, 0.2))])
|
|
diff --git a/train_pred2.sh b/src/train_pred2.sh
|
|
similarity index 100%
|
|
rename from train_pred2.sh
|
|
rename to src/train_pred2.sh
|
|
diff --git a/src/vec_avx.h b/src/vec_avx.h
|
|
index 1e58f8d..520b5b2 100644
|
|
--- a/src/vec_avx.h
|
|
+++ b/src/vec_avx.h
|
|
@@ -79,7 +79,7 @@ static __m128 exp4_approx(__m128 X)
|
|
Y = _mm_castsi128_ps(_mm_and_si128(mask, _mm_add_epi32(I, _mm_castps_si128(Y))));
|
|
return Y;
|
|
}
|
|
-static __m256 exp8_approx(__m256 X)
|
|
+static inline __m256 exp8_approx(__m256 X)
|
|
{
|
|
__m256 Y;
|
|
__m128 Xhi, Xlo, Yhi, Ylo;
|
|
diff --git a/src/vec_sse.h b/src/vec_sse.h
|
|
new file mode 100644
|
|
index 0000000..82ddd42
|
|
--- /dev/null
|
|
+++ b/src/vec_sse.h
|
|
@@ -0,0 +1,211 @@
|
|
+/* Copyright (c) 2020 SASANO Takayoshi
|
|
+ 2018 David Rowe
|
|
+ 2018 Mozilla
|
|
+ 2008-2011 Octasic Inc.
|
|
+ 2012-2017 Jean-Marc Valin */
|
|
+/*
|
|
+ Redistribution and use in source and binary forms, with or without
|
|
+ modification, are permitted provided that the following conditions
|
|
+ are met:
|
|
+
|
|
+ - Redistributions of source code must retain the above copyright
|
|
+ notice, this list of conditions and the following disclaimer.
|
|
+
|
|
+ - Redistributions in binary form must reproduce the above copyright
|
|
+ notice, this list of conditions and the following disclaimer in the
|
|
+ documentation and/or other materials provided with the distribution.
|
|
+
|
|
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
|
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
+*/
|
|
+/*
|
|
+ SSE implementation of vector operations, compile with -msse
|
|
+ port from Arm NEON support
|
|
+*/
|
|
+
|
|
+#include <xmmintrin.h>
|
|
+
|
|
+#ifndef LPCNET_TEST
|
|
+static float celt_exp2(float x)
|
|
+{
|
|
+ int integer;
|
|
+ float frac;
|
|
+ union {
|
|
+ float f;
|
|
+ opus_uint32 i;
|
|
+ } res;
|
|
+ integer = floor(x);
|
|
+ if (integer < -50)
|
|
+ return 0;
|
|
+ frac = x-integer;
|
|
+ /* K0 = 1, K1 = log(2), K2 = 3-4*log(2), K3 = 3*log(2) - 2 */
|
|
+ res.f = 0.99992522f + frac * (0.69583354f
|
|
+ + frac * (0.22606716f + 0.078024523f*frac));
|
|
+ res.i = (res.i + (integer<<23)) & 0x7fffffff;
|
|
+ return res.f;
|
|
+}
|
|
+#define celt_exp_sse(x) celt_exp2((x)*1.44269504f)
|
|
+
|
|
+static float tansig_approx(float x)
|
|
+{
|
|
+ int i;
|
|
+ float y, dy;
|
|
+ float sign=1;
|
|
+ /* Tests are reversed to catch NaNs */
|
|
+ if (!(x<8))
|
|
+ return 1;
|
|
+ if (!(x>-8))
|
|
+ return -1;
|
|
+#ifndef FIXED_POINT
|
|
+ /* Another check in case of -ffast-math */
|
|
+ if (celt_isnan(x))
|
|
+ return 0;
|
|
+#endif
|
|
+ if (x<0)
|
|
+ {
|
|
+ x=-x;
|
|
+ sign=-1;
|
|
+ }
|
|
+ i = (int)floor(.5f+25*x);
|
|
+ x -= .04f*i;
|
|
+ y = tansig_table[i];
|
|
+ dy = 1-y*y;
|
|
+ y = y + x*dy*(1 - y*x);
|
|
+ return sign*y;
|
|
+}
|
|
+
|
|
+static OPUS_INLINE float sigmoid_approx(float x)
|
|
+{
|
|
+ return .5f + .5f*tansig_approx(.5f*x);
|
|
+}
|
|
+
|
|
+static void softmax(float *y, const float *x, int N)
|
|
+{
|
|
+ int i;
|
|
+ for (i=0;i<N;i++)
|
|
+ y[i] = celt_exp_sse(x[i]);
|
|
+}
|
|
+
|
|
+static void vec_tanh(float *y, const float *x, int N)
|
|
+{
|
|
+ int i;
|
|
+ for (i=0;i<N;i++)
|
|
+ {
|
|
+ y[i] = tansig_approx(x[i]);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void vec_sigmoid(float *y, const float *x, int N)
|
|
+{
|
|
+ int i;
|
|
+ for (i=0;i<N;i++)
|
|
+ {
|
|
+ y[i] = sigmoid_approx(x[i]);
|
|
+ }
|
|
+}
|
|
+#endif
|
|
+
|
|
+static void sgemv_accum16(float *out, const float *weights, int rows, int cols, int col_stride, const float *x)
|
|
+{
|
|
+ int i, j;
|
|
+ for (i=0;i<rows;i+=16)
|
|
+ {
|
|
+ float * restrict y = &out[i];
|
|
+
|
|
+ /* keep y[0..15] in registers for duration of inner loop */
|
|
+
|
|
+ __m128 y0_3 = _mm_loadu_ps(&y[0]);
|
|
+ __m128 y4_7 = _mm_loadu_ps(&y[4]);
|
|
+ __m128 y8_11 = _mm_loadu_ps(&y[8]);
|
|
+ __m128 y12_15 = _mm_loadu_ps(&y[12]);
|
|
+
|
|
+ for (j=0;j<cols;j++)
|
|
+ {
|
|
+ const float * restrict w;
|
|
+ __m128 wvec0_3, wvec4_7, wvec8_11, wvec12_15;
|
|
+ __m128 xj = _mm_set1_ps(x[j]);
|
|
+
|
|
+ w = &weights[j*col_stride + i];
|
|
+
|
|
+ wvec0_3 = _mm_loadu_ps(&w[0]);
|
|
+ wvec4_7 = _mm_loadu_ps(&w[4]);
|
|
+ wvec8_11 = _mm_loadu_ps(&w[8]);
|
|
+ wvec12_15 = _mm_loadu_ps(&w[12]);
|
|
+
|
|
+ wvec0_3 = _mm_mul_ps(wvec0_3, xj);
|
|
+ wvec4_7 = _mm_mul_ps(wvec4_7, xj);
|
|
+ wvec8_11 = _mm_mul_ps(wvec8_11, xj);
|
|
+ wvec12_15 = _mm_mul_ps(wvec12_15, xj);
|
|
+
|
|
+ y0_3 = _mm_add_ps(y0_3, wvec0_3);
|
|
+ y4_7 = _mm_add_ps(y4_7, wvec4_7);
|
|
+ y8_11 = _mm_add_ps(y8_11, wvec8_11);
|
|
+ y12_15 = _mm_add_ps(y12_15, wvec12_15);
|
|
+ }
|
|
+
|
|
+ /* save y[0..15] back to memory */
|
|
+
|
|
+ _mm_storeu_ps(&y[0], y0_3);
|
|
+ _mm_storeu_ps(&y[4], y4_7);
|
|
+ _mm_storeu_ps(&y[8], y8_11);
|
|
+ _mm_storeu_ps(&y[12], y12_15);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void sparse_sgemv_accum16(float *out, const float *w, int rows, const int *idx, const float *x)
|
|
+{
|
|
+ int i, j;
|
|
+ for (i=0;i<rows;i+=16)
|
|
+ {
|
|
+ int cols;
|
|
+ cols = *idx++;
|
|
+ float * restrict y = &out[i];
|
|
+
|
|
+ /* keep y[0..15] in registers for duration of inner loop */
|
|
+
|
|
+ __m128 y0_3 = _mm_loadu_ps(&y[0]);
|
|
+ __m128 y4_7 = _mm_loadu_ps(&y[4]);
|
|
+ __m128 y8_11 = _mm_loadu_ps(&y[8]);
|
|
+ __m128 y12_15 = _mm_loadu_ps(&y[12]);
|
|
+
|
|
+ for (j=0;j<cols;j++)
|
|
+ {
|
|
+ __m128 wvec;
|
|
+ __m128 xj = _mm_set1_ps(x[*idx++]);
|
|
+
|
|
+ wvec = _mm_loadu_ps(&w[0]);
|
|
+ wvec = _mm_mul_ps(wvec, xj);
|
|
+ y0_3 = _mm_add_ps(y0_3, wvec);
|
|
+
|
|
+ wvec = _mm_loadu_ps(&w[4]);
|
|
+ wvec = _mm_mul_ps(wvec, xj);
|
|
+ y4_7 = _mm_add_ps(y4_7, wvec);
|
|
+
|
|
+ wvec = _mm_loadu_ps(&w[8]);
|
|
+ wvec = _mm_mul_ps(wvec, xj);
|
|
+ y8_11 = _mm_add_ps(y8_11, wvec);
|
|
+
|
|
+ wvec = _mm_loadu_ps(&w[12]);
|
|
+ wvec = _mm_mul_ps(wvec, xj);
|
|
+ y12_15 = _mm_add_ps(y12_15, wvec);
|
|
+
|
|
+ w += 16;
|
|
+ }
|
|
+
|
|
+ /* save y[0..15] back to memory */
|
|
+
|
|
+ _mm_storeu_ps(&y[0], y0_3);
|
|
+ _mm_storeu_ps(&y[4], y4_7);
|
|
+ _mm_storeu_ps(&y[8], y8_11);
|
|
+ _mm_storeu_ps(&y[12], y12_15);
|
|
+ }
|
|
+}
|
|
diff --git a/train_pred1.sh b/train_pred1.sh
|
|
deleted file mode 100755
|
|
index 3694252..0000000
|
|
--- a/train_pred1.sh
|
|
+++ /dev/null
|
|
@@ -1,31 +0,0 @@
|
|
-#!/bin/sh -x
|
|
-# train_pred2.sh
|
|
-# David Rowe Jan 2019
|
|
-# Train multi-stage VQ for LPCNet
|
|
-
|
|
-PATH=$PATH:/home/david/codec2-dev/build_linux/misc/
|
|
-
|
|
-if [ $# -lt 1 ]; then
|
|
- echo "usage: ./train_pred1.sh [-w] VQprefix"
|
|
- echo " $ ./train_pred1.sh pred1_v1"
|
|
- exit 1
|
|
-fi
|
|
-
|
|
-VQ_NAME=$1
|
|
-echo $VQ_NAME
|
|
-
|
|
-K=18
|
|
-STOP=1E-2
|
|
-
|
|
-echo "*********"
|
|
-echo "Pred 1"
|
|
-echo "*********"
|
|
-echo "weighting dctLy[0] ...."
|
|
-t=$(mktemp)
|
|
-extract all_speech_features.f32 $t 0 17 10 1.0 1
|
|
-cat $t | ./weight > $VQ_NAME'_s0.f32'
|
|
-vqtrain $VQ_NAME'_s0.f32' $K 2048 $VQ_NAME'_stage1.f32' -r $VQ_NAME'_s1.f32' -s $STOP
|
|
-vqtrain $VQ_NAME'_s1.f32' $K 2048 $VQ_NAME'_stage2.f32' -r $VQ_NAME'_s2.f32' -s $STOP
|
|
-vqtrain $VQ_NAME'_s2.f32' $K 2048 $VQ_NAME'_stage3.f32' -r $VQ_NAME'_s3.f32' -s $STOP
|
|
-vqtrain $VQ_NAME'_s3.f32' $K 2048 $VQ_NAME'_stage4.f32' -r $VQ_NAME'_s4.f32' -s $STOP
|
|
-
|
|
diff --git a/unittest/test_core_nn.sh b/unittest/test_core_nn.sh
|
|
index 392c897..cd955c7 100755
|
|
--- a/unittest/test_core_nn.sh
|
|
+++ b/unittest/test_core_nn.sh
|
|
@@ -1,4 +1,4 @@
|
|
-#!/bin/bash
|
|
+#!/bin/bash -x
|
|
# test_core_nn.sh
|
|
#
|
|
|
|
@@ -60,7 +60,7 @@ if [ ! -z $SYNTH_mag ]; then
|
|
../build_linux/src/dump_data --mag --test --c2pitch ../wav/c01_01.wav c01_01.f32
|
|
diff c01_01_mag.f32 c01_01.f32 || { echo "ERROR in synth .f32 output! Exiting..."; exit 1; }
|
|
echo "mag .f32 OK"
|
|
- ../build_linux/src/test_lpcnet --mag -n lpcnet_190804a.f32 c01_01.f32 c01_01_out.raw
|
|
+ ../build_linux/src/test_lpcnet --mag 1 -n lpcnet_190804a.f32 c01_01.f32 c01_01_out.raw
|
|
diff c01_01_190804a_targ.raw c01_01_out.raw || { echo "ERROR in synth .raw output! Exiting..."; exit 1; }
|
|
echo "mag .raw OK"
|
|
fi
|