diff --git a/babl/babl-icc.c b/babl/babl-icc.c index 38e382a..25f36a9 100644 --- a/babl/babl-icc.c +++ b/babl/babl-icc.c @@ -474,10 +474,9 @@ babl_trc_from_icc (ICC *state, } ret = babl_trc_lut_find (lut, count); - if (ret) - return ret; + if (ret == NULL) + ret = babl_trc_lut (NULL, count, lut); - ret = babl_trc_lut (NULL, count, lut); babl_free (lut); return ret; } diff --git a/babl/base/meson.build b/babl/base/meson.build index e59609c..1084d8e 100644 --- a/babl/base/meson.build +++ b/babl/base/meson.build @@ -21,7 +21,7 @@ babl_base_sources = [ babl_base = static_library('babl_base', babl_base_sources, include_directories: [rootInclude, bablInclude], - dependencies: [math, lcms], + dependencies: [math, lcms, log], c_args: common_c_flags + [sse2_cflags] ) @@ -30,14 +30,14 @@ if host_cpu_family == 'x86_64' babl_base_x86_64_v2 = static_library('babl_base-x86-64-v2', babl_base_sources, include_directories: [rootInclude, bablInclude], - dependencies: [math, lcms], + dependencies: [math, lcms, log], c_args: common_c_flags + x86_64_v2_flags ) babl_base_x86_64_v3 = static_library('babl_base-x86-64-v3', babl_base_sources, include_directories: [rootInclude, bablInclude], - dependencies: [math, lcms], + dependencies: [math, lcms, log], c_args: common_c_flags + x86_64_v3_flags ) @@ -49,7 +49,7 @@ if host_cpu_family == 'arm' babl_base_arm_neon = static_library('babl_base-arm-neon', babl_base_sources, include_directories: [rootInclude, bablInclude], - dependencies: [math, lcms], + dependencies: [math, lcms, log], c_args: common_c_flags + arm_neon_flags ) diff --git a/babl/meson.build b/babl/meson.build index 013384e..b98df1a 100644 --- a/babl/meson.build +++ b/babl/meson.build @@ -118,7 +118,7 @@ if meson.is_subproject() subdir('babl') endif -babl_deps = [math, thread, dl, lcms] +babl_deps = [math, thread, dl, lcms, log] babl_includes = [rootInclude, bablBaseInclude] if host_cpu_family == 'x86_64' diff --git a/docs/meson.build b/docs/meson.build index bd006b7..97b76ac 100644 --- a/docs/meson.build +++ b/docs/meson.build @@ -40,7 +40,7 @@ endif TOC = files('toc') html_files = { - 'index': [index_static_html, [ + 'index': ['index-static.html.in', [ ['AUTHORS', authors_file], ['TODO', todo_file], ['NEWS', news_file], diff --git a/extensions/CIE.c b/extensions/CIE.c index 1d7b0ca..5b367d4 100644 --- a/extensions/CIE.c +++ b/extensions/CIE.c @@ -2441,10 +2441,10 @@ rgbaf_to_Lf_sse2 (const Babl *conversion, } static void -rgbaf_to_Labaf_sse2 (const Babl *conversion, - const float *src, - float *dst, - long samples) +rgbaf_to_Labaf_sse2_aligned_4mult (const Babl *conversion, + const float *src, + float *dst, + long samples) { const Babl *space = babl_conversion_get_source_space (conversion); const float m_0_0 = space->space.RGBtoXYZf[0] / D50_WHITE_REF_Xf; @@ -2456,100 +2456,124 @@ rgbaf_to_Labaf_sse2 (const Babl *conversion, const float m_2_0 = space->space.RGBtoXYZf[6] / D50_WHITE_REF_Zf; const float m_2_1 = space->space.RGBtoXYZf[7] / D50_WHITE_REF_Zf; const float m_2_2 = space->space.RGBtoXYZf[8] / D50_WHITE_REF_Zf; - long i = 0; - long remainder; + long i = 0; + + const __m128 m_0_0_v = _mm_set1_ps (m_0_0); + const __m128 m_0_1_v = _mm_set1_ps (m_0_1); + const __m128 m_0_2_v = _mm_set1_ps (m_0_2); + const __m128 m_1_0_v = _mm_set1_ps (m_1_0); + const __m128 m_1_1_v = _mm_set1_ps (m_1_1); + const __m128 m_1_2_v = _mm_set1_ps (m_1_2); + const __m128 m_2_0_v = _mm_set1_ps (m_2_0); + const __m128 m_2_1_v = _mm_set1_ps (m_2_1); + const __m128 m_2_2_v = _mm_set1_ps (m_2_2); + + assert (((uintptr_t) src % 16) + ((uintptr_t) dst % 16) == 0); + assert (samples % 4 == 0); + + for ( ; i < samples; i += 4) + { + __m128 Laba0; + __m128 Laba1; + __m128 Laba2; + __m128 Laba3; + + __m128 rgba0 = _mm_load_ps (src); + __m128 rgba1 = _mm_load_ps (src + 4); + __m128 rgba2 = _mm_load_ps (src + 8); + __m128 rgba3 = _mm_load_ps (src + 12); + + __m128 r = rgba0; + __m128 g = rgba1; + __m128 b = rgba2; + __m128 a = rgba3; + _MM_TRANSPOSE4_PS (r, g, b, a); - if (((uintptr_t) src % 16) + ((uintptr_t) dst % 16) == 0) - { - const long n = (samples / 4) * 4; - const __m128 m_0_0_v = _mm_set1_ps (m_0_0); - const __m128 m_0_1_v = _mm_set1_ps (m_0_1); - const __m128 m_0_2_v = _mm_set1_ps (m_0_2); - const __m128 m_1_0_v = _mm_set1_ps (m_1_0); - const __m128 m_1_1_v = _mm_set1_ps (m_1_1); - const __m128 m_1_2_v = _mm_set1_ps (m_1_2); - const __m128 m_2_0_v = _mm_set1_ps (m_2_0); - const __m128 m_2_1_v = _mm_set1_ps (m_2_1); - const __m128 m_2_2_v = _mm_set1_ps (m_2_2); - - for ( ; i < n; i += 4) { - __m128 Laba0; - __m128 Laba1; - __m128 Laba2; - __m128 Laba3; + __m128 xr = _mm_add_ps (_mm_add_ps (_mm_mul_ps (m_0_0_v, r), _mm_mul_ps (m_0_1_v, g)), + _mm_mul_ps (m_0_2_v, b)); + __m128 yr = _mm_add_ps (_mm_add_ps (_mm_mul_ps (m_1_0_v, r), _mm_mul_ps (m_1_1_v, g)), + _mm_mul_ps (m_1_2_v, b)); + __m128 zr = _mm_add_ps (_mm_add_ps (_mm_mul_ps (m_2_0_v, r), _mm_mul_ps (m_2_1_v, g)), + _mm_mul_ps (m_2_2_v, b)); - __m128 rgba0 = _mm_load_ps (src); - __m128 rgba1 = _mm_load_ps (src + 4); - __m128 rgba2 = _mm_load_ps (src + 8); - __m128 rgba3 = _mm_load_ps (src + 12); + __m128 fx = lab_r_to_f_sse2 (xr); + __m128 fy = lab_r_to_f_sse2 (yr); + __m128 fz = lab_r_to_f_sse2 (zr); - __m128 r = rgba0; - __m128 g = rgba1; - __m128 b = rgba2; - __m128 a = rgba3; - _MM_TRANSPOSE4_PS (r, g, b, a); + __m128 L = _mm_sub_ps (_mm_mul_ps (_mm_set1_ps (116.0f), fy), _mm_set1_ps (16.0f)); + __m128 A = _mm_mul_ps (_mm_set1_ps (500.0f), _mm_sub_ps (fx, fy)); + __m128 B = _mm_mul_ps (_mm_set1_ps (200.0f), _mm_sub_ps (fy, fz)); + + Laba0 = L; + Laba1 = A; + Laba2 = B; + Laba3 = a; + _MM_TRANSPOSE4_PS (Laba0, Laba1, Laba2, Laba3); + } - { - __m128 xr = _mm_add_ps (_mm_add_ps (_mm_mul_ps (m_0_0_v, r), _mm_mul_ps (m_0_1_v, g)), - _mm_mul_ps (m_0_2_v, b)); - __m128 yr = _mm_add_ps (_mm_add_ps (_mm_mul_ps (m_1_0_v, r), _mm_mul_ps (m_1_1_v, g)), - _mm_mul_ps (m_1_2_v, b)); - __m128 zr = _mm_add_ps (_mm_add_ps (_mm_mul_ps (m_2_0_v, r), _mm_mul_ps (m_2_1_v, g)), - _mm_mul_ps (m_2_2_v, b)); + _mm_store_ps (dst, Laba0); + _mm_store_ps (dst + 4, Laba1); + _mm_store_ps (dst + 8, Laba2); + _mm_store_ps (dst + 12, Laba3); - __m128 fx = lab_r_to_f_sse2 (xr); - __m128 fy = lab_r_to_f_sse2 (yr); - __m128 fz = lab_r_to_f_sse2 (zr); + src += 16; + dst += 16; + } +} - __m128 L = _mm_sub_ps (_mm_mul_ps (_mm_set1_ps (116.0f), fy), _mm_set1_ps (16.0f)); - __m128 A = _mm_mul_ps (_mm_set1_ps (500.0f), _mm_sub_ps (fx, fy)); - __m128 B = _mm_mul_ps (_mm_set1_ps (200.0f), _mm_sub_ps (fy, fz)); - - Laba0 = L; - Laba1 = A; - Laba2 = B; - Laba3 = a; - _MM_TRANSPOSE4_PS (Laba0, Laba1, Laba2, Laba3); - } +static void +rgbaf_to_Labaf_sse2 (const Babl *conversion, + const float *src, + float *dst, + long samples) +{ + if (((uintptr_t) src % 16) + ((uintptr_t) dst % 16) == 0 || + samples < 4) + { + long first_samples = samples / 4 * 4; + long remainder; - _mm_store_ps (dst, Laba0); - _mm_store_ps (dst + 4, Laba1); - _mm_store_ps (dst + 8, Laba2); - _mm_store_ps (dst + 12, Laba3); + rgbaf_to_Labaf_sse2_aligned_4mult (conversion, src, dst, first_samples); + remainder = samples - first_samples; - src += 16; - dst += 16; + if (remainder) + { + float __attribute__ ((aligned (16))) aligned_src[16]; + float __attribute__ ((aligned (16))) aligned_dest[16]; + + memcpy (aligned_src, src + first_samples * 4, remainder * 16); + memset (aligned_src + remainder * 4, 0, 4 * 16 - (remainder * 16)); + rgbaf_to_Labaf_sse2_aligned_4mult (conversion, (const float *) aligned_src, aligned_dest, 4); + memcpy (dst + first_samples * 4, aligned_dest, remainder * 16); } } - - remainder = samples - i; - while (remainder--) + else { - float r = src[0]; - float g = src[1]; - float b = src[2]; - float a = src[3]; - - float xr = m_0_0 * r + m_0_1 * g + m_0_2 * b; - float yr = m_1_0 * r + m_1_1 * g + m_1_2 * b; - float zr = m_2_0 * r + m_2_1 * g + m_2_2 * b; + float __attribute__ ((aligned (16))) _aligned_src[4 * samples]; + float __attribute__ ((aligned (16))) _aligned_dst[4 * samples]; + float *aligned_src; + float *aligned_dst; - float fx = xr > LAB_EPSILONf ? _cbrtf (xr) : (LAB_KAPPAf * xr + 16.0f) / 116.0f; - float fy = yr > LAB_EPSILONf ? _cbrtf (yr) : (LAB_KAPPAf * yr + 16.0f) / 116.0f; - float fz = zr > LAB_EPSILONf ? _cbrtf (zr) : (LAB_KAPPAf * zr + 16.0f) / 116.0f; + if (((uintptr_t) src % 16) != 0) + { + aligned_src = _aligned_src; + memcpy (aligned_src, src, samples * 16); + } + else + { + aligned_src = (float *) src; + } - float L = 116.0f * fy - 16.0f; - float A = 500.0f * (fx - fy); - float B = 200.0f * (fy - fz); + if (((uintptr_t) dst % 16) != 0) + aligned_dst = _aligned_dst; + else + aligned_dst = dst; - dst[0] = L; - dst[1] = A; - dst[2] = B; - dst[3] = a; + rgbaf_to_Labaf_sse2 (conversion, aligned_src, aligned_dst, samples); - src += 4; - dst += 4; + if (((uintptr_t) dst % 16) != 0) + memcpy (dst, aligned_dst, samples * 16); } } diff --git a/extensions/HCY.c b/extensions/HCY.c index 31cb8b4..46d7a99 100644 --- a/extensions/HCY.c +++ b/extensions/HCY.c @@ -86,7 +86,7 @@ components (void) babl_component_new ("hue", NULL); babl_component_new ("HCY chroma", "chroma", NULL); babl_component_new ("HCY luma", "luma", NULL); - babl_component_new ("alpha", NULL); + babl_component_new ("alpha", "alpha", NULL); } static void diff --git a/extensions/HSL.c b/extensions/HSL.c index 73b2f03..2c8ae9f 100644 --- a/extensions/HSL.c +++ b/extensions/HSL.c @@ -69,7 +69,7 @@ init (void) babl_component_new ("hue", NULL); babl_component_new ("saturation", NULL); babl_component_new ("lightness", NULL); - babl_component_new ("alpha", NULL); + babl_component_new ("alpha", "alpha", NULL); babl_model_new ("name", "HSL", "doc", "HSL - Hue Saturation Lightness, an improvement over HSV; which uses lightness; defined as (MAX(R,G,B) + MIN(R,G,B))/2 for the grayscale axis; better than HSV, but look into the CIE based spaces for better perceptual uniformity. The HSL space is relative to the RGB space associated with the format.", diff --git a/extensions/HSV.c b/extensions/HSV.c index ad2e002..100636c 100644 --- a/extensions/HSV.c +++ b/extensions/HSV.c @@ -89,7 +89,7 @@ components (void) babl_component_new ("hue", NULL); babl_component_new ("saturation", NULL); babl_component_new ("value", NULL); - babl_component_new ("alpha", NULL); + babl_component_new ("alpha", "alpha", NULL); } static void diff --git a/extensions/ycbcr.c b/extensions/ycbcr.c index 1e779d7..9fe1fa0 100644 --- a/extensions/ycbcr.c +++ b/extensions/ycbcr.c @@ -49,7 +49,7 @@ init (void) static void components (void) { - babl_component_new ("alpha", NULL); + babl_component_new ("alpha", "alpha", NULL); } diff --git a/meson.build b/meson.build index 2ce13ec..9a292d9 100644 --- a/meson.build +++ b/meson.build @@ -1,6 +1,6 @@ project('babl', 'c', license: 'LGPL3+', - version: '0.1.108', + version: '0.1.109', meson_version: '>=0.55.0', default_options: [ 'buildtype=debugoptimized' @@ -110,11 +110,6 @@ platform_win32 = (host_os.startswith('mingw') or host_os.startswith('windows')) platform_osx = host_os.startswith('darwin') -if platform_osx - if cc.get_id() != 'clang' - error('You should use Clang/Clang++ on OSX.') - endif -endif platform_android = host_os.contains('android') @@ -161,6 +156,7 @@ can_run_host_binaries = meson.can_run_host_binaries() # Compiler arguments common_c_flags = [] +common_l_flags = [] if buildtype == 'debugoptimized' or buildtype == 'release' common_c_flags += cc.get_supported_arguments(['-Ofast']) @@ -180,6 +176,22 @@ extra_warnings_list = [ ] common_c_flags += cc.get_supported_arguments(extra_warnings_list) +# Generate .pdb (CodeView) debug symbols (makes possible to debug with DIA SDK) +#pdb_support = cc.has_argument('-gcodeview') and cc.has_link_argument('-Wl,--pdb=') +#if platform_win32 and pdb_support +# common_c_flags += '-gcodeview' +# common_l_flags += '-Wl,--pdb=' +#endif + +if platform_win32 and cc.get_id() == 'clang' + # Optimize DWARF symbols to Dr. Mingw + # https://github.com/jrfonseca/drmingw/issues/42 + common_c_flags += '-gdwarf-aranges' + # Workaround to get colored output + # https://github.com/msys2/MINGW-packages/issues/2988 + common_c_flags += '-fansi-escape-codes' +endif + add_project_arguments(common_c_flags, language: 'c') # Linker arguments @@ -200,9 +212,11 @@ elif host_cpu_family == 'arm' arm_neon_flags = cc.get_supported_arguments(['-mfpu=neon-vfpv4']) arm_neon_flags += '-DARM_NEON' elif host_cpu_family == 'aarch64' - common_c_flags += cc.get_supported_arguments(['-mfpu=neon-fp-armv8']) + common_c_flags += cc.get_supported_arguments(['-mfpu=neon-fp-armv8', '-ftree-vectorize']) endif +add_project_link_arguments(common_l_flags, language: 'c') + ################################################################################ # Check for compiler CPU extensions @@ -497,6 +511,15 @@ todo_file = files('TODO') export_symbols_file = files('export-symbols') gen_babl_map_file = files('gen_babl_map.py') +################################################################################ +# Install debug data (.pdb) on Windows +# Ideally meson should take care of it automatically. +# See: https://github.com/mesonbuild/meson/issues/12977 +#if platform_win32 and pdb_support +# install_win_debug_script = find_program('./meson_install_win_debug.sh') +# meson.add_install_script(install_win_debug_script) +#endif + ################################################################################ # Subdirs diff --git a/meson_install_win_debug.sh b/meson_install_win_debug.sh new file mode 100644 index 0000000..5d09524 --- /dev/null +++ b/meson_install_win_debug.sh @@ -0,0 +1,20 @@ +#!/bin/sh + +find . \( -iname '*.dll' -or -iname '*.exe' \) | \ +while IFS= read -r build_bin; +do + build_bin_name="${build_bin##*/}" + installed_bin=$(find ${MESON_INSTALL_DESTDIR_PREFIX} -iname "$build_bin_name") + if [ x"$installed_bin" != "x" ]; then + install_dir=$(dirname ${installed_bin}) + pdb_debug=$(echo $build_bin|sed 's/\.\(dll\|exe\)$/.pdb/') + if [ -f "$pdb_debug" ]; then + # Note: meson hides script outputs anyway on success. But this can be + # useful when debugging. + echo Installing $pdb_debug to $install_dir + if [ -z "$MESON_INSTALL_DRY_RUN" ]; then + cp -f $pdb_debug $install_dir + fi + fi + fi +done; diff --git a/tests/meson.build b/tests/meson.build index 7c67e70..fd09fb9 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -42,7 +42,7 @@ foreach test_name : test_names test_name + '.c', include_directories: [rootInclude, bablInclude], link_with: babl, - dependencies: [thread, lcms], + dependencies: [thread, lcms, log], export_dynamic: true, install: false, ) diff --git a/tools/meson.build b/tools/meson.build index 89ccf40..a2dc1cb 100644 --- a/tools/meson.build +++ b/tools/meson.build @@ -18,7 +18,7 @@ foreach tool_name : tool_names tool_name + '.c', include_directories: [rootInclude, bablInclude], link_with: babl, - dependencies: [math, thread, lcms], + dependencies: [math, thread, lcms, log], install: false, )