diff --git a/lpcnetfreedv-private_libs.patch b/lpcnetfreedv-private_libs.patch deleted file mode 100644 index 4b79232..0000000 --- a/lpcnetfreedv-private_libs.patch +++ /dev/null @@ -1,15 +0,0 @@ -diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt -index 6c49f5e..a0b9f0a 100644 ---- a/src/CMakeLists.txt -+++ b/src/CMakeLists.txt -@@ -29,8 +29,8 @@ target_include_directories(lpcnetfreedv INTERFACE - $ - ) - install(TARGETS lpcnetfreedv EXPORT lpcnetfreedv-config -- LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} -- ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} -+ LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/lpcnetfreedv -+ ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/lpcnetfreedv - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/lpcnet - ) diff --git a/lpcnetfreedv-test.patch b/lpcnetfreedv-test.patch new file mode 100644 index 0000000..eb94af1 --- /dev/null +++ b/lpcnetfreedv-test.patch @@ -0,0 +1,1486 @@ +diff --git a/.travis.yml b/.travis.yml +index fb795aa..e92c398 100644 +--- a/.travis.yml ++++ b/.travis.yml +@@ -29,7 +29,7 @@ script: + - cd src && sox ../../wav/wia.wav -t raw -r 16000 - | ./lpcnet_enc -s | ./lpcnet_dec -s > /dev/null + # some LPCNet ctests + - ls -l +- - cd $BUILDDIR && ctest ++ - cd $BUILDDIR && ctest --output-on-failure + # Re-build codec2 with LPCNet and test FreeDV 2020 support + - cd $CODEC2DIR/build_linux + - make clean +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 680f52c..1d5b623 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -10,6 +10,7 @@ project(LPCNet C) + option(DISABLE_CPU_OPTIMIZATION "Disable CPU optimization discovery." OFF) + option(AVX2 "Enable AVX2 CPU optimizations." OFF) + option(AVX "Enable AVX CPU optimizations." OFF) ++option(SSE "Enable SSE CPU optimizations." OFF) + option(NEON "Enable NEON CPU optimizations for RPi." OFF) + + include(GNUInstallDirs) +@@ -19,6 +20,11 @@ mark_as_advanced(CLEAR + CMAKE_INSTALL_LIBDIR + ) + ++# Build universal ARM64 and x86_64 binaries on Mac. ++if(BUILD_OSX_UNIVERSAL) ++set(CMAKE_OSX_ARCHITECTURES "x86_64;arm64") ++endif(BUILD_OSX_UNIVERSAL) ++ + # + # Prevent in-source builds + # If an in-source build is attempted, you will still need to clean up a few +@@ -43,15 +49,41 @@ set(LPCNET_VERSION_MINOR 2) + set(LPCNET_VERSION_PATCH FALSE) + set(LPCNET_VERSION "${LPCNET_VERSION_MAJOR}.${LPCNET_VERSION_MINOR}") + # Patch level version bumps should not change API/ABI. +-set(SOVERSION "${LPCNET_VERSION_MAJOR}.${LPCNET_VERSION_MINOR}") ++set(LPCNET_SOVERSION "${LPCNET_VERSION_MAJOR}.${LPCNET_VERSION_MINOR}") + if(LPCNET_VERSION_PATCH) + set(LPCNET_VERSION "${LPCNET_VERSION}.${LPCNET_VERSION_PATCH}") + endif() + message(STATUS "LPCNet version: ${LPCNET_VERSION}") + ++# ++# Find the git hash if this is a working copy. ++# ++if(EXISTS ${CMAKE_SOURCE_DIR}/.git) ++ find_package(Git QUIET) ++ if(Git_FOUND) ++ execute_process( ++ COMMAND "${GIT_EXECUTABLE}" describe --always HEAD ++ WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" ++ RESULT_VARIABLE res ++ OUTPUT_VARIABLE FREEDV_HASH ++ ERROR_QUIET ++ OUTPUT_STRIP_TRAILING_WHITESPACE) ++ message(STATUS "freedv-gui current git hash: ${FREEDV_HASH}") ++ add_definitions(-DGIT_HASH="${FREEDV_HASH}") ++ else() ++ message(WARNING "Git not found. Can not determine current commit hash.") ++ add_definitions(-DGIT_HASH="Unknown") ++ endif() ++else() ++ add_definitions(-DGIT_HASH="None") ++endif() ++ + # Set default flags + set(CMAKE_C_FLAGS "-Wall -W -Wextra -Wno-unused-function -O3 -g -I. -MD ${CMAKE_C_FLAGS} -DENABLE_ASSERTIONS") + ++# Arch specific stuff here ++message(STATUS "Host system arch is: ${CMAKE_SYSTEM_PROCESSOR}") ++ + # Detection of available CPU optimizations + if(NOT DISABLE_CPU_OPTIMIZATION) + if(UNIX AND NOT APPLE) +@@ -60,15 +92,25 @@ if(NOT DISABLE_CPU_OPTIMIZATION) + OUTPUT_VARIABLE AVX2) + execute_process(COMMAND grep -c "avx " /proc/cpuinfo + OUTPUT_VARIABLE AVX) ++ execute_process(COMMAND grep -c "sse4_1 " /proc/cpuinfo ++ OUTPUT_VARIABLE SSE) + execute_process(COMMAND grep -c "neon" /proc/cpuinfo + OUTPUT_VARIABLE NEON) + elseif(APPLE) +- # Under OSX we need to look through a few sysctl entries to determine what our CPU supports. +- message(STATUS "Looking for available CPU optimizations on an OSX system...") +- execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.leaf7_features COMMAND grep -c AVX2 +- OUTPUT_VARIABLE AVX2) +- execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.features COMMAND grep -c AVX +- OUTPUT_VARIABLE AVX) ++ if(BUILD_OSX_UNIVERSAL) ++ # Presume AVX/AVX2 are enabled on the x86 side. The ARM side will auto-enable ++ # NEON optimizations by virtue of being aarch64. ++ set(AVX TRUE) ++ set(AVX2 TRUE) ++ set(SSE TRUE) ++ else() ++ # Under OSX we need to look through a few sysctl entries to determine what our CPU supports. ++ message(STATUS "Looking for available CPU optimizations on an OSX system...") ++ execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.leaf7_features COMMAND grep -c AVX2 ++ OUTPUT_VARIABLE AVX2) ++ execute_process(COMMAND sysctl -a COMMAND grep machdep.cpu.features COMMAND grep -c AVX ++ OUTPUT_VARIABLE AVX) ++ endif(BUILD_OSX_UNIVERSAL) + elseif(WIN32) + message(STATUS "No detection capability on Windows, assuming AVX is available.") + set(AVX TRUE) +@@ -85,9 +127,13 @@ elseif(${AVX} OR ${AVX} GREATER 0) + # AVX2 machines will also match on AVX + message(STATUS "avx processor flags found or enabled.") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx") ++elseif(${SSE} OR ${SSE} GREATER 0) ++# AVX and AVX2 machines will also match on SSE ++ message(STATUS "sse processor flags found or enabled.") ++ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1") + endif() + +-# RPi ++# RPi / ARM 32bit + if(${NEON} OR ${NEON} GREATER 0) + message(STATUS "neon processor flags found or enabled.") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -march=armv8-a -mtune=cortex-a53") +diff --git a/README.md b/README.md +index c446450..5b72d8c 100644 +--- a/README.md ++++ b/README.md +@@ -25,14 +25,22 @@ LPCNet at 1733 bits/s using direct-split quantiser: + ``` + sox ../../wav/wia.wav -t raw -r 16000 - | ./lpcnet_enc -s | ./lpcnet_dec -s | aplay -f S16_LE -r 16000 + ``` +-# CTests ++ ++## Manually Selecting SIMD Technology ++ ++Cmake will select the fastest SIMD available (AVX/SSSE/None), however you can manually select e.g.: ++``` ++make -DDISABLE_CPU_OPTIMIZATION=ON -DSSE=ON -DCODEC2_BUILD_DIR=~/codec2/build_linux .. ++``` ++ ++## CTests + + ``` + $ cd ~/LPCNet/build_linux + $ ctest + ``` + +-Note, due to precision/library issues several tests (1-3) will only pass on certain machines such as Ubuntu 16 and 18, Ubuntu 17 is known to fail. ++Note, due to precision/library issues several tests (1-3) will [only pass on some machines](https://github.com/drowe67/LPCNet/issues/17). + + # Reading Further + +diff --git a/src/700c_train.sh b/src/700c_train.sh +new file mode 100755 +index 0000000..3be057e +--- /dev/null ++++ b/src/700c_train.sh +@@ -0,0 +1,73 @@ ++#!/bin/bash -x ++# 700c_train.sh ++# David Rowe March 2020 ++# Experiments in LPCNet decoding of Codec 2 700C ++ ++PATH=$HOME/codec2/build_linux/src:$HOME/LPCNet/build_linux/src:$HOME/LPCNet/src:$PATH ++ ++if [ "$#" -ne 1 ]; then ++ echo "usage: ./700c_train.sh datestamp" ++ echo " ./700c_train.sh 200404" ++ exit 0 ++fi ++ ++train1=dev-clean-8k ++test1=test-clean-8k ++test2=all_speech_subset_8k ++test3=all_8k ++datestamp=$1 ++epochs=30 ++log=${1}.txt ++train=${datestamp}_train ++ ++# synth "c2sim arg for experiment" "experiment label" "filename" ++synth() { ++ test=$3 ++ c2sim ~/Downloads/${test}.sw --rateKWov ${test}.f32 ${1} ++ test_lpcnet --mag 2 --frame_size 80 --pre 0 ${test}.f32 ${datestamp}_${test}_${2}.sw ++} ++ ++# experient "c2sim arg for experiment" "experiment label" ++experiment() { ++ echo "------------------------------------------------------------------------------" ++ echo "train starting" ${2} ++ echo "------------------------------------------------------------------------------" ++ ++ c2sim ${train}.sw --ten_ms_centre ${train}_10ms.sw --rateKWov ${train}.f32 ${1} ++ sw2packedulaw --frame_size 80 ${train}_10ms.sw ${train}.f32 ${train}_10ms.pulaw ++ ++ train_lpcnet.py ${train}.f32 ${train}_10ms.pulaw ${datestamp}_${2} --epochs ${epochs} --frame_size 80 ++ ++ dump_lpcnet.py ${datestamp}_${2}_${epochs}.h5 ++ cp nnet_data.c src ++ make test_lpcnet ++ ++ synth "${1}" "${2}" "${test1}" ++ synth "${1}" "${2}" "${test2}" ++ synth "${1}" "${2}" "${test3}" ++} ++ ++rm -f $log ++ ++( ++ date ++ ++ # assemble some training speech ++ sox -r 8000 -c 1 ~/Downloads/${train1}.sw \ ++ -t sw -r 8000 -c 1 ${train}.sw ++ ++ # LPCNet with 10ms frames (similar to training data) ++ experiment "" "none" ++ ++ # Codec 2 700C at 40ms frame rate (700 bits/s) from c2dec ++ c2enc 700C ~/Downloads/${test1}.sw - --eq --var | c2dec 700C - /dev/null --mlfeat ${test1}_dec4.f32 ++ test_lpcnet --mag 2 --frame_size 80 --pre 0 ${test1}_dec4.f32 ${datestamp}_${test1}_40.sw ++ c2enc 700C ~/Downloads/${test2}.sw - --eq --var | c2dec 700C - /dev/null --mlfeat ${test2}_dec4.f32 ++ test_lpcnet --mag 2 --frame_size 80 --pre 0 ${test2}_dec4.f32 ${datestamp}_${test2}_40.sw ++ c2enc 700C ~/Downloads/${test3}.sw - --eq --var | c2dec 700C - /dev/null --mlfeat ${test3}_dec4.f32 ++ test_lpcnet --mag 2 --frame_size 80 --pre 0 ${test3}_dec4.f32 ${datestamp}_${test3}_40.sw ++ ++ date ++) |& tee $log ++ ++ +diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt +index 41a78dc..0df4672 100644 +--- a/src/CMakeLists.txt ++++ b/src/CMakeLists.txt +@@ -23,6 +23,8 @@ add_library(lpcnetfreedv SHARED ${lpcnet_freedv_srcs}) + target_link_libraries(lpcnetfreedv codec2) + set_target_properties(lpcnetfreedv PROPERTIES + PUBLIC_HEADER lpcnet_freedv.h ++ VERSION ${LPCNET_VERSION} ++ SOVERSION ${LPCNET_SOVERSION} + ) + target_include_directories(lpcnetfreedv INTERFACE + $ +@@ -49,11 +51,11 @@ target_link_libraries(dump_data lpcnetfreedv m codec2) + add_executable(test_lpcnet test_lpcnet.c) + target_link_libraries(test_lpcnet lpcnetfreedv m codec2) + +-if(AVX OR AVX2) ++if(SSE OR AVX OR AVX2 OR CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") + add_executable(test_vec test_vec.c) + target_link_libraries(test_vec m) + else() +- message(WARNING "No AVX/AVX2 CPU flags identified, not building test_vec.") ++ message(WARNING "No SSE/AVX/AVX2 CPU flags identified, not building test_vec.") + endif() + + add_executable(quant_feat quant_feat.c) +@@ -98,6 +100,12 @@ target_link_libraries(idct lpcnetfreedv m codec2) + add_executable(nnet2f32 nnet2f32.c) + target_link_libraries(nnet2f32 lpcnetfreedv m) + ++add_executable(sw2packedulaw sw2packedulaw.c) ++target_link_libraries(sw2packedulaw lpcnetfreedv m) ++ ++add_executable(thash thash.c) ++target_link_libraries(thash lpcnetfreedv m) ++ + install(TARGETS lpcnet_enc lpcnet_dec + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + ) +diff --git a/src/codec2_pitch.c b/src/codec2_pitch.c +index a267785..55fb5bc 100644 +--- a/src/codec2_pitch.c ++++ b/src/codec2_pitch.c +@@ -113,6 +113,7 @@ int codec2_pitch_est(CODEC2_PITCH *pitch, float Sn[], float *f0, float *voicing) + + void codec2_pitch_destroy(CODEC2_PITCH *pitch) + { ++ free(pitch->fft_fwd_cfg); + nlp_destroy(pitch->nlp_states); + free(pitch->w); + free(pitch); +diff --git a/src/concat.sh b/src/concat.sh +old mode 100644 +new mode 100755 +index 8369117..d98ccda +--- a/src/concat.sh ++++ b/src/concat.sh +@@ -1,6 +1,8 @@ +-# Place in 16k-LP7 from TSPSpeech.iso and run to concatenate wave files +-# into one headerless training file +-for i in */*.wav ++#!/bin/bash ++# Concatenate .wav files into one headerless .sw training file ++# usage: ./concat.sh concatfile.sw ++ ++for i in `find . -name '*.wav'` + do + sox $i -r 16000 -c 1 -t sw - +-done > input.s16 ++done > $1 +diff --git a/src/dump_data.c b/src/dump_data.c +index cd936cf..4e8d3c4 100644 +--- a/src/dump_data.c ++++ b/src/dump_data.c +@@ -453,6 +453,7 @@ int main(int argc, char **argv) { + assert(pitch_index < 2*PITCH_MAX_PERIOD); + assert(pitch_index >= 2*PITCH_MIN_PERIOD); + features[2*NB_BANDS] = 0.01*(pitch_index-200); ++ //fprintf(stderr, "count: %d [36] %f pitch_index: %d\n", count, features[36], pitch_index); + if (c2voicing_en) features[2*NB_BANDS+1] = voicing; + } + fwrite(features, sizeof(float), NB_FEATURES, ffeat); +diff --git a/ext_pitch.sh b/src/ext_pitch.sh +similarity index 100% +rename from ext_pitch.sh +rename to src/ext_pitch.sh +diff --git a/src/flac_to_wav.sh b/src/flac_to_wav.sh +new file mode 100755 +index 0000000..8f8aa29 +--- /dev/null ++++ b/src/flac_to_wav.sh +@@ -0,0 +1,10 @@ ++#!/bin/bash ++# Convert all .flac files under this folder to .wav files ++# source: several GitHub repos ++ ++find . -iname "*.flac" | wc ++ ++for flacfile in `find . -iname "*.flac"` ++do ++ ffmpeg -y -f flac -i $flacfile -ab 64k -ac 1 -ar 16000 -f wav "${flacfile%.*}.wav" ++done +diff --git a/src/freq.c b/src/freq.c +index c88d071..dbe94d9 100644 +--- a/src/freq.c ++++ b/src/freq.c +@@ -140,6 +140,13 @@ static void check_init() { + common.init = 1; + } + ++void freq_close() { ++ if (common.init) { ++ opus_fft_free(common.kfft,0); ++ common.init = 0; ++ } ++} ++ + void dct(float *out, const float *in) { + int i; + check_init(); +diff --git a/src/freq.h b/src/freq.h +index 0316edd..314eabd 100644 +--- a/src/freq.h ++++ b/src/freq.h +@@ -42,6 +42,7 @@ + + #define NB_BANDS 18 + ++void freq_close(void); + void compute_band_energy(float *bandE, const kiss_fft_cpx *X); + void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P); + +diff --git a/src/lpcnet.c b/src/lpcnet.c +index e117f1c..9f3f059 100644 +--- a/src/lpcnet.c ++++ b/src/lpcnet.c +@@ -54,8 +54,10 @@ struct LPCNetState { + float old_lpc[FEATURES_DELAY][LPC_ORDER]; + float old_gain[FEATURES_DELAY]; + int frame_count; ++ float preemph; + float deemph_mem; +- FILE *ftest; /* used to dump states for automates tests */ ++ int pitch_embedding; ++ FILE *ftest; /* used to dump states for automated tests */ + }; + + +@@ -118,6 +120,8 @@ LPCNetState *lpcnet_create() + lpcnet = (LPCNetState *)calloc(sizeof(LPCNetState), 1); + lpcnet->last_exc = 128; + lpcnet->ftest = NULL; ++ lpcnet->preemph = PREEMPH; ++ lpcnet->pitch_embedding = 1; + return lpcnet; + } + +@@ -135,7 +139,15 @@ void lpcnet_open_test_file(LPCNetState *lpcnet, char file_name[]) { + } + } + +-void lpcnet_synthesize(LPCNetState *lpcnet, short *output, const float *features, int N, int logmag) ++void lpcnet_set_preemph(LPCNetState *lpcnet, float preemph) { ++ lpcnet->preemph = preemph; ++} ++ ++void lpcnet_set_pitch_embedding(LPCNetState *lpcnet, int val) { ++ lpcnet->pitch_embedding = val; ++} ++ ++void lpcnet_synthesize(LPCNetState *lpcnet, short *output, float *features, int N, int mag) + { + static int count = 0; + int i; +@@ -149,13 +161,19 @@ void lpcnet_synthesize(LPCNetState *lpcnet, short *output, const float *features + static int start = 0; /*(LPC_ORDER+1*/; + /* FIXME: Do proper rounding once the Python code rounds properly. */ + +- pitch = (int)floor(.1 + 50*features[36]+100); +- assert(pitch >=0); assert(pitch <= 255); +- /* latest networks (using the codec 2 pitch estimator) are trained +- with pitch estimates between 40 and 255, but due to the pitch +- quantiser design and bit errors it's possible to get pitch +- values down to 32, which upsets the pitch embed matrix */ +- if (pitch < 40) pitch = 40; ++ if (lpcnet->pitch_embedding) { ++ pitch = (int)floor(.1 + 50*features[36]+100); ++ //fprintf(stderr, "count: %d [36] %f pitch: %d\n", lpcnet->frame_count, features[36], pitch); ++ assert(pitch >=0); assert(pitch <= 255); ++ /* latest networks (using the codec 2 pitch estimator) are trained ++ with pitch estimates between 40 and 255, but due to the pitch ++ quantiser design and bit errors it's possible to get pitch ++ values down to 32, which upsets the pitch embed matrix */ ++ if (pitch < 40) pitch = 40; ++ } ++ else { ++ pitch = 0; ++ } + + pitch_gain = lpcnet->old_gain[FEATURES_DELAY-1]; + memmove(&lpcnet->old_gain[1], &lpcnet->old_gain[0], (FEATURES_DELAY-1)*sizeof(lpcnet->old_gain[0])); +@@ -164,13 +182,30 @@ void lpcnet_synthesize(LPCNetState *lpcnet, short *output, const float *features + memcpy(lpc, lpcnet->old_lpc[FEATURES_DELAY-1], LPC_ORDER*sizeof(lpc[0])); + memmove(lpcnet->old_lpc[1], lpcnet->old_lpc[0], (FEATURES_DELAY-1)*LPC_ORDER*sizeof(lpc[0])); + +- if (logmag) { +- float tmp[NB_BANDS]; ++ switch (mag) { ++ case 0: ++ lpc_from_cepstrum(lpcnet->old_lpc[0], features); ++ break; ++ case 1: ++ { ++ float tmp[NB_BANDS]; + for (i=0;iold_lpc[0], tmp); + } +- else +- lpc_from_cepstrum(lpcnet->old_lpc[0], features); ++ break; ++ case 2: ++ for (i=0;iold_lpc[0][i] = features[i+NB_BANDS]; ++ } ++ break; ++ default: ++ assert(0); ++ } ++ ++ /* We optinally use this part of feature vector to pass in LPCs, ++ * but we don't want any non zero values here hitting the ++ * frame rate network. TODO: better design */ ++ RNN_CLEAR(&features[18], 18); + + if (lpcnet->ftest) { + float pitch_f = pitch; +@@ -220,7 +255,7 @@ void lpcnet_synthesize(LPCNetState *lpcnet, short *output, const float *features + RNN_MOVE(&lpcnet->last_sig[1], &lpcnet->last_sig[0], LPC_ORDER-1); + lpcnet->last_sig[0] = pcm; + lpcnet->last_exc = exc; +- pcm += PREEMPH*lpcnet->deemph_mem; ++ pcm += lpcnet->preemph*lpcnet->deemph_mem; + lpcnet->deemph_mem = pcm; + if (pcm<-32767) pcm = -32767; + if (pcm>32767) pcm = 32767; +diff --git a/src/lpcnet.h b/src/lpcnet.h +index 70e849e..bd98a37 100644 +--- a/src/lpcnet.h ++++ b/src/lpcnet.h +@@ -34,8 +34,10 @@ + typedef struct LPCNetState LPCNetState; + LPCNetState *lpcnet_create(); + void lpcnet_destroy(LPCNetState *lpcnet); +-void lpcnet_synthesize(LPCNetState *lpcnet, short *output, const float *features, int N, int logmag); ++void lpcnet_synthesize(LPCNetState *lpcnet, short *output, float *features, int N, int logmag); + + void lpcnet_open_test_file(LPCNetState *lpcnet, char file_name[]); ++void lpcnet_set_preemph(LPCNetState *lpcnet, float preemph); ++void lpcnet_set_pitch_embedding(LPCNetState *lpcnet, int val); + + #endif +diff --git a/src/lpcnet.py b/src/lpcnet.py +index 010f478..960e8c8 100644 +--- a/src/lpcnet.py ++++ b/src/lpcnet.py +@@ -36,7 +36,6 @@ import numpy as np + import h5py + import sys + +-frame_size = 160 + pcm_bits = 8 + embed_size = 128 + pcm_levels = 2**pcm_bits +@@ -113,7 +112,7 @@ class PCMInit(Initializer): + 'seed': self.seed + } + +-def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 38, training=False, use_gpu=True): ++def new_lpcnet_model(frame_size = 160, rnn_units1=384, rnn_units2=16, nb_used_features = 38, training=False, use_gpu=True): + pcm = Input(shape=(None, 3)) + feat = Input(shape=(None, nb_used_features)) + pitch = Input(shape=(None, 1)) +diff --git a/src/lpcnet_dump.c b/src/lpcnet_dump.c +index d8a8409..58f9c98 100644 +--- a/src/lpcnet_dump.c ++++ b/src/lpcnet_dump.c +@@ -87,7 +87,8 @@ static DenoiseState *rnnoise_create() { + } + + static void rnnoise_destroy(DenoiseState *st) { +- free(st); ++ freq_close(); ++ free(st); + } + + static short float2short(float x) +diff --git a/src/lpcnet_freedv.c b/src/lpcnet_freedv.c +index 823fcdc..fe154ea 100644 +--- a/src/lpcnet_freedv.c ++++ b/src/lpcnet_freedv.c +@@ -80,3 +80,9 @@ void lpcnet_dec(LPCNetFreeDV *lf, char *frame, short* pcm) + + int lpcnet_samples_per_frame(LPCNetFreeDV *lf) { return FRAME_SIZE*lf->q->dec; } + int lpcnet_bits_per_frame(LPCNetFreeDV *lf) { return lf->q->bits_per_frame; } ++ ++static char git_hash[] = GIT_HASH; ++char *lpcnet_get_hash(void) { ++ return git_hash; ++} ++ +diff --git a/src/lpcnet_freedv.h b/src/lpcnet_freedv.h +index 43c8298..874f7cc 100644 +--- a/src/lpcnet_freedv.h ++++ b/src/lpcnet_freedv.h +@@ -8,6 +8,10 @@ + #ifndef __LPCNET_FREEDV__ + #define __LPCNET_FREEDV__ + ++#ifdef __cplusplus ++ extern "C" { ++#endif ++ + typedef struct LPCNetFreeDV LPCNetFreeDV; + + LPCNetFreeDV* lpcnet_freedv_create(int direct_split); +@@ -16,5 +20,10 @@ void lpcnet_enc(LPCNetFreeDV *lf, short *pcm, char *frame); + void lpcnet_dec(LPCNetFreeDV *lf, char *frame, short* pcm); + int lpcnet_bits_per_frame(LPCNetFreeDV *lf); + int lpcnet_samples_per_frame(LPCNetFreeDV *lf); ++char *lpcnet_get_hash(void); ++ ++#ifdef __cplusplus ++} ++#endif + + #endif +diff --git a/src/nnet.c b/src/nnet.c +index 8ad4a26..1da7d70 100644 +--- a/src/nnet.c ++++ b/src/nnet.c +@@ -43,7 +43,9 @@ + + #ifdef __AVX__ + #include "vec_avx.h" +-#elif __ARM_NEON__ ++#elif __SSE__ ++#include "vec_sse.h" ++#elif __ARM_NEON__ || __aarch64__ + #include "vec_neon.h" + #else + #warning Compiling without any vectorization. This code will be very slow +diff --git a/src/plot_lpc.m b/src/plot_lpc.m +new file mode 100644 +index 0000000..3b814be +--- /dev/null ++++ b/src/plot_lpc.m +@@ -0,0 +1,50 @@ ++% plot_lpc.m ++% David Rowe April 2020 ++% ++% Visualise LPC spectra for 700C decoder experiments ++ ++Fs = 8000; % speech sample rate ++Fsf = 100; % frame sample rate ++nb_features = 55; ++nb_rateK = 18; % number of rateK (log amplitude) features ++nb_lpc = 10; % number of LPCs ++ ++function plot_against_time(v, st_sec, en_sec, Fs, leg='b') ++ st = Fs*st_sec; en = Fs*en_sec; ++ t = st_sec:1/Fs:en_sec; ++ plot(t,v(st+1:en+1),leg); ++endfunction ++ ++function mesh_against_time(m, st_sec, en_sec, Fs) ++ st = Fs*st_sec; en = Fs*en_sec; ++ t = st_sec:1/Fs:en_sec; ++ mesh(m(st+1:en+1,:)); ++endfunction ++ ++function mesh_aks_against_time(aks, st_sec, en_sec, Fs) ++ st = Fs*st_sec; en = Fs*en_sec; ++ t = st_sec:1/Fs:en_sec; ++ aks = aks(st+1:en+1,:); A = []; ++ for f=1:length(aks) ++ A = [A freqz(1,[1 aks(f,:)],64)]; ++ end ++ AdB = 20*log10(abs(A)); ++ max(AdB(:)) ++ mesh(AdB); ++endfunction ++ ++# plots of speech (input), rateK vectors, LPC spectra ++ ++features=load_f32("../build_linux/all_8k.f32", nb_features); ++rateK=features(:, 1:nb_rateK); ++aks = features(:, nb_rateK+1:nb_rateK+nb_lpc); ++fs=fopen("../build_linux/all_8k_10ms.sw","rb"); ++s = fread(fs,Inf,"short"); ++fclose(fs); ++ ++st_sec=14; en_sec=16; ++ ++figure(1); clf; plot_against_time(s, st_sec, en_sec, Fs, 'b') ++figure(2); clf; mesh_against_time(rateK, st_sec, en_sec, Fsf); ++figure(3); clf; mesh_aks_against_time(aks, st_sec, en_sec, Fsf); ++ +diff --git a/src/plot_pulaw.py b/src/plot_pulaw.py +new file mode 100755 +index 0000000..10d5656 +--- /dev/null ++++ b/src/plot_pulaw.py +@@ -0,0 +1,52 @@ ++#!/usr/bin/python3 ++# Utility to inspect packed ulaw samples from sw2packedulaw.c (or dump_data.c) before training ++ ++import numpy as np ++import matplotlib.pyplot as plt ++import sys ++import ulaw ++import argparse ++ ++parser = argparse.ArgumentParser(description='Plot LPCNet training packed ulaw samples') ++parser.add_argument('file1', help='pulaw file of packed ulaw samples') ++parser.add_argument('--file2', help='optional second packed ulaw file to compare') ++parser.add_argument('--nb_samples', type=int, default=-1, help='Optional number of samples to plot') ++args = parser.parse_args() ++ ++data = np.fromfile(args.file1, dtype='uint8') ++nb_samples = args.nb_samples ++data = data[:nb_samples] ++ ++sig = np.array(data[0::4], dtype='float') ++pred = np.array(data[1::4], dtype='float') ++in_exc = np.array(data[2::4], dtype='float') ++out_exc = np.array(data[3::4], dtype='float') ++ ++print("exc var: %4.3e" % (np.var(ulaw.ulaw2lin(in_exc)))) ++ ++plt.figure(1) ++plt.subplot(211) ++plt.plot(ulaw.ulaw2lin(sig), label='sig') ++plt.ylim((-30000,30000)) ++plt.legend() ++plt.subplot(212) ++plt.plot(ulaw.ulaw2lin(pred), label='pred') ++plt.ylim((-30000,30000)) ++plt.legend() ++plt.show(block=False) ++ ++plt.figure(2) ++plt.subplot(211) ++plt.plot(ulaw.ulaw2lin(in_exc), label='in_exc') ++if args.file2: ++ data2 = np.fromfile(args.file2, dtype='uint8') ++ data2 = data2[:nb_samples] ++ in_exc2 = np.array(data2[2::4], dtype='float') ++ plt.plot(ulaw.ulaw2lin(in_exc2), label='in_exc2') ++plt.ylim((-30000,30000)) ++plt.legend() ++plt.subplot(212) ++plt.plot(ulaw.ulaw2lin(out_exc), label='out_exc') ++plt.ylim((-30000,30000)) ++plt.legend() ++plt.show() +diff --git a/src/plot_train.py b/src/plot_train.py +index 910d7e9..7e2bc7b 100644 +--- a/src/plot_train.py ++++ b/src/plot_train.py +@@ -3,11 +3,10 @@ import numpy as np + import sys + + loss = np.loadtxt(sys.argv[1]) +-delta_loss = (loss[1:,0]-loss[:-1,0])/loss[1:,0] ++delta_loss = (loss[1:]-loss[:-1])/loss[1:] + + plt.figure(1) +-plt.plot(loss[:,0],'r') +-plt.plot(loss[:,1],'g') ++plt.plot(loss[:],'r') + plt.title('loss') + plt.show(block=False) + plt.figure(2) +diff --git a/src/plot_train.sh b/src/plot_train.sh +index 2a1fddf..3c86094 100755 +--- a/src/plot_train.sh ++++ b/src/plot_train.sh +@@ -6,5 +6,5 @@ + # plot graphs of loss and spares categorical accuracy to get a feel + # for progress while training + +-grep loss $1 | sed -n 's/.*===\].*loss: \(.*\) - val_loss: \(.*\)/\1 \2/p' > loss.txt +-python3 plot_train.py loss.txt ++grep loss $1 | sed -n 's/.*===\].*step - loss: \(.*\)/\1/p' > loss.txt ++python3 ~/LPCNet/src/plot_train.py loss.txt +diff --git a/process.sh b/src/process.sh +similarity index 100% +rename from process.sh +rename to src/process.sh +diff --git a/src/sw2packedulaw.c b/src/sw2packedulaw.c +new file mode 100644 +index 0000000..7724158 +--- /dev/null ++++ b/src/sw2packedulaw.c +@@ -0,0 +1,188 @@ ++/* ++ sw2packedulaw.c ++ ++ Convert signed word samples to packed ulaw samples to drive LPCNet ++ training, this code is a cut/paste from dump_data.c witha few other ++ options. ++ ++ By varying the LPC predictor coefficients we can try no predictor, ++ first order, and regular LPC. ++ ++ 1. No prediction (WaveRNN I guess): ++ $ ~/codec2/build_linux/src/c2sim ~/Downloads/all_8k.sw --ten_ms_centre all_8k_10ms.sw --rateKWov all_8k.f32 ++ $ ./src/sw2packedulaw --frame_size 80 all_8k_10ms.sw all_8k.f32 all_8k_none.pulaw ++ $ ../src/plot_pulaw.py all_8k_none.pulaw ++ ++ 2. First order predictor: ++ $ ~/codec2/build_linux/src/c2sim ~/Downloads/all_8k.sw --ten_ms_centre all_8k_10ms.sw --rateKWov all_8k.f32 --first ++ $ ./src/sw2packedulaw --frame_size 80 all_8k_10ms.sw all_8k.f32 all_8k_first.pulaw ++ ++ 3. LPC with ulaw Q in the loop and noise injection (standard LPCNet design): ++ $ ~/codec2/build_linux/src/c2sim ~/Downloads/all_8k.sw --ten_ms_centre all_8k_10ms.sw --rateKWov all_8k.f32 --lpc 10 ++ $ ./src/sw2packedulaw --frame_size 80all_8k_10ms.sw all_8k.f32 all_8k.pulaw ++ ++ 4. LPC with no Q in the loop or noise injection (linear): ++ $ ./src/sw2packedulaw --frame_size 80 --linear all_8k_10ms.sw all_8k.f32 all_8k_linear.pulaw ++ ++ See plot_pulaw.py to inspect output .pulaw files ++*/ ++ ++#include ++#include ++#include "common.h" ++#include ++#include "freq.h" ++#include "pitch.h" ++#include "arch.h" ++#include "celt_lpc.h" ++#include ++#include ++ ++#define NB_FEATURES 55 ++#define CODEC2_LPC_ORDER 10 ++ ++typedef struct { ++ float lpc[LPC_ORDER]; ++ float sig_mem[LPC_ORDER]; ++ int exc_mem; ++} DenoiseState; ++ ++void write_audio(DenoiseState *st, const short *pcm, float noise_std, FILE *file, int frame_size) { ++ int i; ++ unsigned char data[4*frame_size]; ++ for (i=0;ilpc[j]*st->sig_mem[j]; ++ e = lin2ulaw(pcm[i] - p); ++ /* Signal. */ ++ data[4*i] = lin2ulaw(st->sig_mem[0]); ++ /* Prediction. */ ++ data[4*i+1] = lin2ulaw(p); ++ /* Excitation in. */ ++ data[4*i+2] = st->exc_mem; ++ /* Excitation out. */ ++ data[4*i+3] = e; ++ /* Simulate error on excitation. */ ++ noise = (int)floor(.5 + noise_std*.707*(log_approx((float)rand()/RAND_MAX)-log_approx((float)rand()/RAND_MAX))); ++ e += noise; ++ e = IMIN(255, IMAX(0, e)); ++ ++ RNN_MOVE(&st->sig_mem[1], &st->sig_mem[0], LPC_ORDER-1); ++ st->sig_mem[0] = p + ulaw2lin(e); ++ st->exc_mem = e; ++ } ++ fwrite(data, 4*frame_size, 1, file); ++} ++ ++/* takes ulaw out of predictor path, and no noise injection */ ++void write_audio_linear(DenoiseState *st, const short *pcm, FILE *file, int frame_size) { ++ int i; ++ unsigned char data[4*frame_size]; ++ for (i=0;ilpc[j]*st->sig_mem[j]; ++ e = pcm[i] - p; ++ //fprintf(stderr,"pcm: %d p: %f e: %f\n", pcm[i], p, e); ++ /* Signal. */ ++ data[4*i] = lin2ulaw(st->sig_mem[0]); ++ /* Prediction. */ ++ data[4*i+1] = lin2ulaw(p); ++ /* Excitation in. */ ++ data[4*i+2] = st->exc_mem; ++ /* Excitation out. */ ++ data[4*i+3] = lin2ulaw(e); ++ ++ RNN_MOVE(&st->sig_mem[1], &st->sig_mem[0], LPC_ORDER-1); ++ st->sig_mem[0] = pcm[i]; ++ st->exc_mem = lin2ulaw(e); ++ } ++ fwrite(data, 4*frame_size, 1, file); ++} ++ ++int main(int argc, char *argv[]) { ++ int linear = 0; ++ int frame_size = FRAME_SIZE; ++ ++ DenoiseState st; ++ memset(&st, 0, sizeof(DenoiseState)); ++ st.exc_mem = 128; ++ ++ int o = 0; ++ int opt_idx = 0; ++ while( o != -1 ) { ++ static struct option long_opts[] = { ++ {"linear", no_argument, 0, 'l'}, ++ {"frame_size", required_argument, 0, 'f'}, ++ {0, 0, 0, 0} ++ }; ++ ++ o = getopt_long(argc,argv,"l",long_opts,&opt_idx); ++ ++ switch(o){ ++ case 'f': ++ frame_size = atoi(optarg); ++ fprintf(stderr, "frame_size: %d\n", frame_size); ++ break; ++ case 'l': ++ linear = 1; ++ break; ++ case '?': ++ goto helpmsg; ++ break; ++ } ++ } ++ int dx = optind; ++ ++ if ((argc - dx) < 3) { ++ helpmsg: ++ fprintf(stderr, "usage: s2packedulaw Input.s16 FeatureFile.f32 Output.pulaw\n"); ++ return 0; ++ } ++ ++ FILE *fsw = fopen(argv[dx], "rb"); ++ if (fsw == NULL) { ++ fprintf(stderr, "Can't open %s\n", argv[dx]); ++ exit(1); ++ } ++ ++ FILE *ffeature = fopen(argv[dx+1], "rb"); ++ if (ffeature == NULL) { ++ fprintf(stderr, "Can't open %s\n", argv[dx+1]); ++ exit(1); ++ } ++ ++ FILE *fpackedpcm = fopen(argv[dx+2], "wb"); ++ if (fpackedpcm == NULL) { ++ fprintf(stderr, "Can't open %s\n", argv[dx+2]); ++ exit(1); ++ } ++ ++ short frame[frame_size]; ++ while (fread(frame, sizeof(short), frame_size, fsw) == (unsigned)frame_size) { ++ float features[NB_FEATURES]; ++ int ret = fread(features, sizeof(float), NB_FEATURES, ffeature); ++ if (ret != NB_FEATURES) { ++ fprintf(stderr, "feature file ended early!\n"); ++ exit(1); ++ } ++ for(int i=0; i \n"); ++ fprintf(stderr, "usage: test_lpcnet [--mag 1|2] [--logstates statesfile] [--nnet lpcnet_xxx.f32]" ++ " [--framesize samples] [--pre 0|1] \n"); ++ fprintf(stderr, "--mag -i 0-cepstrals, 1-logmag, 2-disable LPC (WaveRNN)\n"); + return 0; + } + +@@ -99,13 +116,12 @@ int main(int argc, char **argv) { + while (1) { + float in_features[NB_TOTAL_FEATURES]; + float features[NB_FEATURES]; +- short pcm[FRAME_SIZE]; ++ short pcm[frame_size]; + int nread = fread(in_features, sizeof(features[0]), NB_TOTAL_FEATURES, fin); + if (nread != NB_TOTAL_FEATURES) break; + RNN_COPY(features, in_features, NB_FEATURES); +- RNN_CLEAR(&features[18], 18); +- lpcnet_synthesize(net, pcm, features, FRAME_SIZE, logmag); +- fwrite(pcm, sizeof(pcm[0]), FRAME_SIZE, fout); ++ lpcnet_synthesize(net, pcm, features, frame_size, mag); ++ fwrite(pcm, sizeof(pcm[0]), frame_size, fout); + if (fout == stdout) fflush(stdout); + } + fclose(fin); +diff --git a/src/test_vec.c b/src/test_vec.c +index 09b51e7..efa617e 100644 +--- a/src/test_vec.c ++++ b/src/test_vec.c +@@ -26,7 +26,10 @@ const char simd[]="AVX2"; + #else + const char simd[]="AVX"; + #endif +-#elif __ARM_NEON__ ++#elif __SSE__ ++#include "vec_sse.h" ++const char simd[]="SSE"; ++#elif __ARM_NEON__ || __aarch64__ + #include "vec_neon.h" + const char simd[]="NEON"; + #else +diff --git a/src/thash.c b/src/thash.c +new file mode 100644 +index 0000000..5b60f2e +--- /dev/null ++++ b/src/thash.c +@@ -0,0 +1,19 @@ ++/*---------------------------------------------------------------------------*\ ++ ++ FILE........: thash.c ++ AUTHOR......: David Rowe ++ DATE CREATED: July 2020 ++ ++ Simple test program for LPCNet API get hash function ++ ++\*---------------------------------------------------------------------------*/ ++ ++#include ++#include "lpcnet_freedv.h" ++ ++int main(void) { ++ printf("%s\n", lpcnet_get_hash()); ++ return 0; ++} ++ ++ +diff --git a/train_direct.sh b/src/train_direct.sh +similarity index 100% +rename from train_direct.sh +rename to src/train_direct.sh +diff --git a/src/train_lpcnet.py b/src/train_lpcnet.py +index 62abbd7..94ab9a8 100755 +--- a/src/train_lpcnet.py ++++ b/src/train_lpcnet.py +@@ -35,9 +35,14 @@ from keras.callbacks import ModelCheckpoint + from ulaw import ulaw2lin, lin2ulaw + import keras.backend as K + import h5py +- ++import argparse ++import os + import tensorflow as tf + from keras.backend.tensorflow_backend import set_session ++import matplotlib.pyplot as plt ++ ++# less verbose tensorflow .... ++os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' + config = tf.ConfigProto() + + # use this option to reserve GPU memory, e.g. for running more than +@@ -46,23 +51,38 @@ config = tf.ConfigProto() + + set_session(tf.Session(config=config)) + +-nb_epochs = 10 +- + # Try reducing batch_size if you run out of memory on your GPU + batch_size = 32 ++# with of feature records used for training ++nb_features = 55 ++ ++parser = argparse.ArgumentParser(description='LPCNet training') ++parser.add_argument('feature_file', help='.f32 file of float features') ++parser.add_argument('packed_ulaw_file', help='file of 4 multiplexed ulaw samples per speech sample') ++parser.add_argument('prefix', help='.h5 file prefix to easily identify each experiment') ++parser.add_argument('--frame_size', type=int, default=160, help='frames size in samples') ++parser.add_argument('--epochs', type=int, default=20, help='Number of training epochs') ++parser.add_argument('--no_pitch_embedding', action='store_true', help='disable pitch embedding') ++parser.add_argument('--load_h5', help='disable pitch embedding') ++args = parser.parse_args() + +-model, _, _ = lpcnet.new_lpcnet_model(training=True) ++nb_epochs = args.epochs ++ ++model, _, _ = lpcnet.new_lpcnet_model(frame_size=args.frame_size, training=True) + + model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) + model.summary() + +-feature_file = sys.argv[1] +-pcm_file = sys.argv[2] # 16 bit unsigned short PCM samples +-prefix = sys.argv[3] # prefix to put on .h5 files to easily name each experiment ++if args.load_h5: ++ print("loading: %s" % (args.load_h5)) ++ model.load_weights(args.load_h5) ++ ++feature_file = args.feature_file ++pcm_file = args.packed_ulaw_file ++prefix = args.prefix + frame_size = model.frame_size +-nb_features = 55 + nb_used_features = model.nb_used_features +-feature_chunk_size = 15 ++feature_chunk_size = 15 # time window for conv1d/receptive field + pcm_chunk_size = frame_size*feature_chunk_size + + # u for unquantised, load 16 bit PCM samples and convert to mu-law +@@ -84,7 +104,17 @@ in_exc = np.reshape(data[2::4], (nb_frames, pcm_chunk_size, 1)) + out_exc = np.reshape(data[3::4], (nb_frames, pcm_chunk_size, 1)) + del data + +-print("ulaw std = ", np.std(out_exc)) ++""" ++# plot ulaw signals to sanity check ++testf=10 ++print(sig.shape) ++#plt.plot(sig[testf,:],label="sig") ++#plt.plot(pred[testf,:],label="pred") ++plt.plot(in_exc[testf,:],label="in_exc") ++plt.plot(out_exc[testf,:],label="out_exc") ++plt.legend() ++plt.show() ++""" + + features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features)) + features = features[:, :, :nb_used_features] +@@ -93,12 +123,34 @@ features = features[:, :, :nb_used_features] + # nb_used_features=38, so 0...37, so lpc-gain not used + features[:,:,18:36] = 0 # zero out 18..35, so pitch and pitch gain being fed in, lpc gain ignored + ++""" ++# plot features to sanity check ++print(features.shape) ++testf=10 ++plt.plot(features[testf,:,37:38]) ++plt.show() ++""" ++ + fpad1 = np.concatenate([features[0:1, 0:2, :], features[:-1, -2:, :]], axis=0) + fpad2 = np.concatenate([features[1:, :2, :], features[0:1, -2:, :]], axis=0) + features = np.concatenate([fpad1, features, fpad2], axis=1) + +-# pitch feature uses as well as cesptrals ++# pitch feature uses as well as cepstrals + periods = (.1 + 50*features[:,:,36:37]+100).astype('int16') ++print(periods.shape) ++if args.no_pitch_embedding: ++ print("no_pitch_embedding") ++ periods[:] = 0 ++# sanity check training data aginst pitch embedding range ++assert np.all(periods >= 40), "pitch embedding < 40" ++assert np.all(periods < 256), "pitch embeddeding > 255" ++ ++""" ++# plot pitch to sanity check ++print(features.shape, periods.shape) ++plt.plot(periods.reshape(-1)[:1000]) ++plt.show() ++""" + + in_data = np.concatenate([sig, pred, in_exc], axis=-1) + +@@ -108,9 +160,8 @@ del in_exc + + # dump models to disk as we go + #checkpoint = ModelCheckpoint('lpcnet20h_384_10_G16_{epoch:02d}.h5') +-checkpoint = ModelCheckpoint(prefix + '_{epoch:02d}.h5') ++checkpoint = ModelCheckpoint(prefix + '_{epoch:d}.h5') + + # use this to reload a partially trained model +-#model.load_weights('lpcnet_190203_07.h5') + model.compile(optimizer=Adam(0.001, amsgrad=True, decay=5e-5), loss='sparse_categorical_crossentropy') +-model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, validation_split=0.1, callbacks=[checkpoint, lpcnet.Sparsify(2000, 40000, 400, (0.05, 0.05, 0.2))]) ++model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, callbacks=[checkpoint, lpcnet.Sparsify(2000, 40000, 400, (0.05, 0.05, 0.2))]) +diff --git a/train_pred2.sh b/src/train_pred2.sh +similarity index 100% +rename from train_pred2.sh +rename to src/train_pred2.sh +diff --git a/src/vec_avx.h b/src/vec_avx.h +index 1e58f8d..520b5b2 100644 +--- a/src/vec_avx.h ++++ b/src/vec_avx.h +@@ -79,7 +79,7 @@ static __m128 exp4_approx(__m128 X) + Y = _mm_castsi128_ps(_mm_and_si128(mask, _mm_add_epi32(I, _mm_castps_si128(Y)))); + return Y; + } +-static __m256 exp8_approx(__m256 X) ++static inline __m256 exp8_approx(__m256 X) + { + __m256 Y; + __m128 Xhi, Xlo, Yhi, Ylo; +diff --git a/src/vec_sse.h b/src/vec_sse.h +new file mode 100644 +index 0000000..82ddd42 +--- /dev/null ++++ b/src/vec_sse.h +@@ -0,0 +1,211 @@ ++/* Copyright (c) 2020 SASANO Takayoshi ++ 2018 David Rowe ++ 2018 Mozilla ++ 2008-2011 Octasic Inc. ++ 2012-2017 Jean-Marc Valin */ ++/* ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ ++ - Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ ++ - Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ ++ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR ++ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ++ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ++ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ++ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ++ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ++ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++*/ ++/* ++ SSE implementation of vector operations, compile with -msse ++ port from Arm NEON support ++*/ ++ ++#include ++ ++#ifndef LPCNET_TEST ++static float celt_exp2(float x) ++{ ++ int integer; ++ float frac; ++ union { ++ float f; ++ opus_uint32 i; ++ } res; ++ integer = floor(x); ++ if (integer < -50) ++ return 0; ++ frac = x-integer; ++ /* K0 = 1, K1 = log(2), K2 = 3-4*log(2), K3 = 3*log(2) - 2 */ ++ res.f = 0.99992522f + frac * (0.69583354f ++ + frac * (0.22606716f + 0.078024523f*frac)); ++ res.i = (res.i + (integer<<23)) & 0x7fffffff; ++ return res.f; ++} ++#define celt_exp_sse(x) celt_exp2((x)*1.44269504f) ++ ++static float tansig_approx(float x) ++{ ++ int i; ++ float y, dy; ++ float sign=1; ++ /* Tests are reversed to catch NaNs */ ++ if (!(x<8)) ++ return 1; ++ if (!(x>-8)) ++ return -1; ++#ifndef FIXED_POINT ++ /* Another check in case of -ffast-math */ ++ if (celt_isnan(x)) ++ return 0; ++#endif ++ if (x<0) ++ { ++ x=-x; ++ sign=-1; ++ } ++ i = (int)floor(.5f+25*x); ++ x -= .04f*i; ++ y = tansig_table[i]; ++ dy = 1-y*y; ++ y = y + x*dy*(1 - y*x); ++ return sign*y; ++} ++ ++static OPUS_INLINE float sigmoid_approx(float x) ++{ ++ return .5f + .5f*tansig_approx(.5f*x); ++} ++ ++static void softmax(float *y, const float *x, int N) ++{ ++ int i; ++ for (i=0;i $VQ_NAME'_s0.f32' +-vqtrain $VQ_NAME'_s0.f32' $K 2048 $VQ_NAME'_stage1.f32' -r $VQ_NAME'_s1.f32' -s $STOP +-vqtrain $VQ_NAME'_s1.f32' $K 2048 $VQ_NAME'_stage2.f32' -r $VQ_NAME'_s2.f32' -s $STOP +-vqtrain $VQ_NAME'_s2.f32' $K 2048 $VQ_NAME'_stage3.f32' -r $VQ_NAME'_s3.f32' -s $STOP +-vqtrain $VQ_NAME'_s3.f32' $K 2048 $VQ_NAME'_stage4.f32' -r $VQ_NAME'_s4.f32' -s $STOP +- +diff --git a/unittest/test_core_nn.sh b/unittest/test_core_nn.sh +index 392c897..cd955c7 100755 +--- a/unittest/test_core_nn.sh ++++ b/unittest/test_core_nn.sh +@@ -1,4 +1,4 @@ +-#!/bin/bash ++#!/bin/bash -x + # test_core_nn.sh + # + +@@ -60,7 +60,7 @@ if [ ! -z $SYNTH_mag ]; then + ../build_linux/src/dump_data --mag --test --c2pitch ../wav/c01_01.wav c01_01.f32 + diff c01_01_mag.f32 c01_01.f32 || { echo "ERROR in synth .f32 output! Exiting..."; exit 1; } + echo "mag .f32 OK" +- ../build_linux/src/test_lpcnet --mag -n lpcnet_190804a.f32 c01_01.f32 c01_01_out.raw ++ ../build_linux/src/test_lpcnet --mag 1 -n lpcnet_190804a.f32 c01_01.f32 c01_01_out.raw + diff c01_01_190804a_targ.raw c01_01_out.raw || { echo "ERROR in synth .raw output! Exiting..."; exit 1; } + echo "mag .raw OK" + fi diff --git a/lpcnetfreedv-vector-updates.patch b/lpcnetfreedv-vector-updates.patch deleted file mode 100644 index 6000514..0000000 --- a/lpcnetfreedv-vector-updates.patch +++ /dev/null @@ -1,62 +0,0 @@ -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 680f52c..e536f30 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -52,6 +52,9 @@ message(STATUS "LPCNet version: ${LPCNET_VERSION}") - # Set default flags - set(CMAKE_C_FLAGS "-Wall -W -Wextra -Wno-unused-function -O3 -g -I. -MD ${CMAKE_C_FLAGS} -DENABLE_ASSERTIONS") - -+# Arch specific stuff here -+message(STATUS "Host system arch is: ${CMAKE_SYSTEM_PROCESSOR}") -+ - # Detection of available CPU optimizations - if(NOT DISABLE_CPU_OPTIMIZATION) - if(UNIX AND NOT APPLE) -@@ -87,7 +90,7 @@ elseif(${AVX} OR ${AVX} GREATER 0) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx") - endif() - --# RPi -+# RPi / ARM 32bit - if(${NEON} OR ${NEON} GREATER 0) - message(STATUS "neon processor flags found or enabled.") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -march=armv8-a -mtune=cortex-a53") -diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt -index 41a78dc..6c49f5e 100644 ---- a/src/CMakeLists.txt -+++ b/src/CMakeLists.txt -@@ -49,7 +49,7 @@ target_link_libraries(dump_data lpcnetfreedv m codec2) - add_executable(test_lpcnet test_lpcnet.c) - target_link_libraries(test_lpcnet lpcnetfreedv m codec2) - --if(AVX OR AVX2) -+if(AVX OR AVX2 OR CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") - add_executable(test_vec test_vec.c) - target_link_libraries(test_vec m) - else() -diff --git a/src/nnet.c b/src/nnet.c -index 8ad4a26..ccb9c94 100644 ---- a/src/nnet.c -+++ b/src/nnet.c -@@ -43,7 +43,7 @@ - - #ifdef __AVX__ - #include "vec_avx.h" --#elif __ARM_NEON__ -+#elif __ARM_NEON__ || __aarch64__ - #include "vec_neon.h" - #else - #warning Compiling without any vectorization. This code will be very slow -diff --git a/src/test_vec.c b/src/test_vec.c -index 09b51e7..254292b 100644 ---- a/src/test_vec.c -+++ b/src/test_vec.c -@@ -26,7 +26,7 @@ const char simd[]="AVX2"; - #else - const char simd[]="AVX"; - #endif --#elif __ARM_NEON__ -+#elif __ARM_NEON__ || __aarch64__ - #include "vec_neon.h" - const char simd[]="NEON"; - #else diff --git a/lpcnetfreedv.spec b/lpcnetfreedv.spec index 07b763b..6de645a 100644 --- a/lpcnetfreedv.spec +++ b/lpcnetfreedv.spec @@ -1,6 +1,9 @@ +%undefine __cmake_in_source_build +%global sover 0.2 + Name: lpcnetfreedv Version: 0.2 -Release: 4%{?dist} +Release: 5%{?dist} Summary: LPCNet for FreeDV License: BSD @@ -8,10 +11,7 @@ URL: https://github.com/drowe67/LPCNet Source0: https://github.com/drowe67/LPCNet/archive/v%{version}/LPCNet-%{version}.tar.gz Source1: http://rowetel.com/downloads/deep/lpcnet_191005_v1.0.tgz -# Fixes for aarch64 which has NEON instructions natively -Patch0: lpcnetfreedv-vector-updates.patch -# Make library private for FreeDV -Patch1: lpcnetfreedv-private_libs.patch +Patch0: lpcnetfreedv-test.patch BuildRequires: cmake gcc BuildRequires: codec2-devel @@ -37,7 +37,7 @@ Summary: Development files and tools for LPCNet %build # Add model data archive to the build directory so CMake finds it. mkdir -p %{_vpath_builddir} -cp %{SOURCE1} %{_vpath_builddir}/ +cp %{SOURCE1} %{__cmake_builddir}/ # We need to force optimizations to specific values since the build system and # host system will likely be different. @@ -63,15 +63,19 @@ cp %{SOURCE1} %{_vpath_builddir}/ %files %license COPYING %doc README.md -%{_libdir}/%{name}/lib%{name}.so +%{_libdir}/lib%{name}.so.%{sover} %files devel %{_bindir}/* %{_includedir}/lpcnet/ %{_libdir}/cmake/lpcnetfreedv/ +%{_libdir}/lib%{name}.so %changelog +* Sun Dec 20 2020 Richard Shaw - 0.2-5 +- Change library install location to %%{_libdir}. + * Sat Aug 01 2020 Fedora Release Engineering - 0.2-4 - Second attempt - Rebuilt for https://fedoraproject.org/wiki/Fedora_33_Mass_Rebuild