From e2b9c2942da619d7f188492676b806501202eea1 Mon Sep 17 00:00:00 2001 From: Nicolas Chauvet Date: Wed, 3 Oct 2018 10:30:37 +0200 Subject: [PATCH] Squashed commit of the following: commit b6104e121006290f146177e1847ccd7826d2b6f6 Author: Niklas Haas Date: Sat Sep 29 18:18:49 2018 +0200 common: switch to explicit includes Since we no longer need the GCC pragma for symbol visibility, there's also no more reason to have common.h import everything under the sun - so only have modules import what they actually use. This speeds up compilation a tiny bit. commit 6835ae5d2ffb5e8a5257992e0a606d9209721e66 Author: Niklas Haas Date: Sat Sep 29 18:08:45 2018 +0200 meson: use a linker script for symbol visibility This prevents us from re-exporting e.g. glslang symbols, thereby properly fixing #42. commit 1484190ce490dbd277cd55b586e1952dd4d1d0d0 Author: Niklas Haas Date: Sat Sep 29 17:47:48 2018 +0200 meson: build C++ with -fvisibility=hidden as well commit de715031efe61d9399b8ad8468364e6798c5a11f Author: Niklas Haas Date: Sat Sep 29 02:59:29 2018 +0200 tests: add test cases for std140/std430 packing To avoid introducing regressions in the future, now that I'm sure it works as it's supposed to. commit e6e7be86e8d7585f8178544a2fc3c4eaca5b336e Author: Niklas Haas Date: Sat Sep 29 02:59:16 2018 +0200 gpu: add clarifying comment on pl_var_layout commit 6c5c652a7a2baeb1fb8be74ca8794ce510b86ac2 Author: Niklas Haas Date: Sat Sep 29 03:02:16 2018 +0200 vulkan: add VK_NVX_raytracing boilerplate commit e6a3f6f06fa62bd8eeb542ae82f34666f6b1c05e Author: Niklas Haas Date: Sat Sep 29 01:18:47 2018 +0200 dispatch: fix std140/std430 packing rules Actually, turns out our rules were wrong after all: vec3 only consumes 3 words even though it's aligned to 4, which we were not correctly accounting for. So in the struct { vec3; float; vec2 } the float and vec3 can be packed into the same vec4, whereas our code was assuming the vec3 consumed all four words (like its alignment). commit 792a21f9027af1d529d44b3a463c2b90ab38b451 Author: Niklas Haas Date: Sat Sep 29 00:40:02 2018 +0200 dispatch: use explicit offsets for push constants In the distant past this was a compile error, but it seems that has been fixed in glslang etc. Using explicit offsets is both safer and actually solves some very real driver bugs w.r.t wrong std430 packing. commit 693b346e2a10d7e33104da8e00173c9224cc94d6 Author: Niklas Haas Date: Fri Sep 28 22:50:43 2018 +0200 shaders: fix compilation on clang Clang doesn't like using sqrt in a constexpr, so just hard-code the constant. Whatever. commit 6875a9c833f93bf803ca35a5115c2116e91e61d5 Author: Niklas Haas Date: Fri Sep 28 22:18:00 2018 +0200 shaders: improve the seeding Seeding by gl_FragCoord was very bad since the PRNG seed generation performs horribly for large inputs. (mpv avoided this problem since it was seeding with the [0,1)-scaled texture coordinates, but we don't have access to those here) Also, multiplying the temporal index by pi also resulted in relatively cyclic patterns, since pi is well-approximated by 22/7. A simple way to salvage the situation without major code changes is to just use fract(phi * x) to turn an integer counter into a quasi-random floating point input. Since phi is the most irrational number this does not generate any visually obvious patterns at realistic scales. commit 5278a77eacb35559b65aca656c060b42c1fe5926 Author: Niklas Haas Date: Fri Sep 28 18:18:15 2018 +0200 glslang: update for new glslang version This adds some new members to this struct. I still have zero clue why they don't provide this structure for us, so we don't have to copy/paste it from example code.. commit 2345f60b9350c1ff5d64a682bbd0144cbf7f92f9 Author: Sebastian Ramacher Date: Fri Sep 28 17:43:52 2018 +0200 meson: require version 0.47 Type 'feature' for get_option was introduced in meson 0.47, so require it. commit 17ded744c2459294fc900a4309645b90f0296fc8 Author: Niklas Haas Date: Fri Sep 28 03:27:18 2018 +0200 vulkan: treat empty device name like NULL Right now, an empty device name would reject all devices except one that's literally empty. This change makes that case behave more sanely. commit cc394dd0aa0f72ab530164150e70e8c4b3f56d3a Author: Niklas Haas Date: Fri Sep 28 00:23:38 2018 +0200 travis: avoid meson 0.48.0 This breaks the test suite cf. mesonbuild/meson#4248 --- .travis.yml | 2 +- meson.build | 1 + src/colorspace.c | 1 + src/common.c | 1 + src/common.h | 24 ---- src/context.h | 1 + src/dispatch.c | 2 +- src/dispatch.h | 1 + src/dither.c | 1 + src/filters.c | 2 + src/glsl/glslang.cc | 12 ++ src/gpu.c | 30 ++--- src/gpu.h | 2 + src/include/libplacebo/gpu.h | 2 + src/lcms.h | 2 + src/libplacebo.sym | 219 +++++++++++++++++++++++++++++++++++ src/meson.build | 10 +- src/renderer.c | 2 + src/shaders.c | 12 +- src/shaders.h | 2 + src/shaders/colorspace.c | 3 + src/shaders/sampling.c | 2 + src/spirv.h | 2 + src/swapchain.h | 1 + src/tests/bench.c | 5 + src/tests/colorspace.c | 1 + src/tests/dither.c | 1 + src/tests/filters.c | 1 + src/tests/gpu_tests.h | 6 + src/tests/tests.h | 1 + src/tests/utils.c | 62 ++++++++++ src/tests/vulkan.c | 1 + src/utils/upload.c | 2 + src/vulkan/common.h | 2 + src/vulkan/context.c | 2 +- src/vulkan/utils.c | 3 + 36 files changed, 378 insertions(+), 46 deletions(-) create mode 100644 src/libplacebo.sym diff --git a/.travis.yml b/.travis.yml index e0e3e17..2b0b09f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -33,4 +33,4 @@ install: - export PATH="`pwd`/bin:${PATH}" - pyenv global system 3.6 - wget https://github.com/ninja-build/ninja/releases/download/v1.7.2/ninja-linux.zip && unzip -q ninja-linux.zip -d bin - - pip3 install meson + - pip3 install 'meson != 0.48.0' diff --git a/meson.build b/meson.build index 720e988..b96d7ed 100644 --- a/meson.build +++ b/meson.build @@ -1,6 +1,7 @@ project('libplacebo', ['c', 'cpp'], license: 'LGPL2.1+', default_options: ['c_std=c99'], + meson_version: '>=0.47', ) subdir('src') diff --git a/src/colorspace.c b/src/colorspace.c index ee97f74..20d28b5 100644 --- a/src/colorspace.c +++ b/src/colorspace.c @@ -18,6 +18,7 @@ #include #include "common.h" +#include bool pl_color_system_is_ycbcr_like(enum pl_color_system sys) { diff --git a/src/common.c b/src/common.c index f787494..13d0f4f 100644 --- a/src/common.c +++ b/src/common.c @@ -16,6 +16,7 @@ */ #include "common.h" +#include void pl_rect2d_normalize(struct pl_rect2d *rc) { diff --git a/src/common.h b/src/common.h index b336cac..d5439d2 100644 --- a/src/common.h +++ b/src/common.h @@ -28,30 +28,6 @@ #include "config.h" #include "pl_assert.h" -// Include all of the symbols that should be public in a way that marks them -// as being externally visible. (Otherwise, all symbols are hidden by default) -#pragma GCC visibility push(default) - -#include "include/libplacebo/colorspace.h" -#include "include/libplacebo/common.h" -#include "include/libplacebo/context.h" -#include "include/libplacebo/dispatch.h" -#include "include/libplacebo/dither.h" -#include "include/libplacebo/filters.h" -#include "include/libplacebo/gpu.h" -#include "include/libplacebo/renderer.h" -#include "include/libplacebo/shaders.h" -#include "include/libplacebo/shaders/colorspace.h" -#include "include/libplacebo/shaders/sampling.h" -#include "include/libplacebo/swapchain.h" -#include "include/libplacebo/utils/upload.h" - -#if PL_HAVE_VULKAN -#include "include/libplacebo/vulkan.h" -#endif - -#pragma GCC visibility pop - // Align up to the nearest multiple of an arbitrary alignment, which may also // be 0 to signal no alignment requirements. #define PL_ALIGN(x, align) ((align) ? ((x) + (align) - 1) / (align) * (align) : (x)) diff --git a/src/context.h b/src/context.h index 939c5ce..9223d3d 100644 --- a/src/context.h +++ b/src/context.h @@ -21,6 +21,7 @@ #include "bstr/bstr.h" #include "common.h" +#include struct pl_context { struct pl_context_params params; diff --git a/src/dispatch.c b/src/dispatch.c index 323b526..f2beeaf 100644 --- a/src/dispatch.c +++ b/src/dispatch.c @@ -313,7 +313,7 @@ static void generate_shaders(struct pl_dispatch *dp, struct pass *pass, struct pass_var *pv = &pass->vars[i]; if (pv->type != PASS_VAR_PUSHC) continue; - ADD(glsl, "/*offset=%zu*/ ", pv->layout.offset); + ADD(glsl, " layout(offset=%zu) ", pv->layout.offset); add_var(dp, glsl, var); } ADD(glsl, "};\n"); diff --git a/src/dispatch.h b/src/dispatch.h index 1db70ad..c2fa0c3 100644 --- a/src/dispatch.h +++ b/src/dispatch.h @@ -18,6 +18,7 @@ #pragma once #include "common.h" +#include // Like `pl_dispatch_begin`, but has an extra `unique` parameter. If this is // true, the generated shader will be uniquely namespaced `unique` and may be diff --git a/src/dither.c b/src/dither.c index e2b54ae..4f739c9 100644 --- a/src/dither.c +++ b/src/dither.c @@ -30,6 +30,7 @@ #include #include "common.h" +#include void pl_generate_bayer_matrix(float *data, int size) { diff --git a/src/filters.c b/src/filters.c index 7f7aca4..bfaf7f2 100644 --- a/src/filters.c +++ b/src/filters.c @@ -39,6 +39,8 @@ #include "common.h" #include "context.h" +#include + bool pl_filter_function_eq(const struct pl_filter_function *a, const struct pl_filter_function *b) { diff --git a/src/glsl/glslang.cc b/src/glsl/glslang.cc index d0fd081..8b98bf6 100644 --- a/src/glsl/glslang.cc +++ b/src/glsl/glslang.cc @@ -192,6 +192,18 @@ const TBuiltInResource DefaultTBuiltInResource = { /* .MaxCullDistances = */ 8, /* .MaxCombinedClipAndCullDistances = */ 8, /* .MaxSamples = */ 4, +#if GLSLANG_PATCH_LEVEL >= 2892 + /* .maxMeshOutputVerticesNV = */ 256, + /* .maxMeshOutputPrimitivesNV = */ 512, + /* .maxMeshWorkGroupSizeX_NV = */ 32, + /* .maxMeshWorkGroupSizeY_NV = */ 1, + /* .maxMeshWorkGroupSizeZ_NV = */ 1, + /* .maxTaskWorkGroupSizeX_NV = */ 32, + /* .maxTaskWorkGroupSizeY_NV = */ 1, + /* .maxTaskWorkGroupSizeZ_NV = */ 1, + /* .maxMeshViewCountNV = */ 4, +#endif + /* .limits = */ { /* .nonInductiveForLoops = */ 1, /* .whileLoops = */ 1, diff --git a/src/gpu.c b/src/gpu.c index f34d6ab..9308c5c 100644 --- a/src/gpu.c +++ b/src/gpu.c @@ -1057,22 +1057,23 @@ struct pl_var_layout std140_layout(const struct pl_gpu *gpu, size_t offset, // std140 packing rules: // 1. The size of generic values is their size in bytes - // 2. The size of vectors is the vector length * the base count, with the - // exception of *vec3 which is always the same size as *vec4 + // 2. The size of vectors is the vector length * the base count // 3. Matrices are treated like arrays of column vectors // 4. The size of array rows is that of the element size rounded up to // the nearest multiple of vec4 - // 5. All values are aligned to a multiple of their size (stride for arrays) - size_t size = el_size * var->dim_v; + // 5. All values are aligned to a multiple of their size (stride for arrays), + // with the exception of vec3 which is aligned like vec4 + size_t stride = el_size * var->dim_v; + size_t align = stride; if (var->dim_v == 3) - size += el_size; + align += el_size; if (var->dim_m * var->dim_a > 1) - size = PL_ALIGN2(size, sizeof(float[4])); + stride = align = PL_ALIGN2(stride, sizeof(float[4])); return (struct pl_var_layout) { - .offset = PL_ALIGN2(offset, size), - .stride = size, - .size = size * var->dim_m * var->dim_a, + .offset = PL_ALIGN2(offset, align), + .stride = stride, + .size = stride * var->dim_m * var->dim_a, }; } @@ -1083,14 +1084,15 @@ struct pl_var_layout std430_layout(const struct pl_gpu *gpu, size_t offset, // std430 packing rules: like std140, except arrays/matrices are always // "tightly" packed, even arrays/matrices of vec3s - size_t size = el_size * var->dim_v; + size_t stride = el_size * var->dim_v; + size_t align = stride; if (var->dim_v == 3 && var->dim_m == 1 && var->dim_a == 1) - size += el_size; + align += el_size; return (struct pl_var_layout) { - .offset = PL_ALIGN2(offset, size), - .stride = size, - .size = size * var->dim_m * var->dim_a, + .offset = PL_ALIGN2(offset, align), + .stride = stride, + .size = stride * var->dim_m * var->dim_a, }; } diff --git a/src/gpu.h b/src/gpu.h index a45ed90..408d263 100644 --- a/src/gpu.h +++ b/src/gpu.h @@ -18,6 +18,8 @@ #pragma once #include "common.h" +#include +#include #define GPU_PFN(name) __typeof__(pl_##name) *name struct pl_gpu_fns { diff --git a/src/include/libplacebo/gpu.h b/src/include/libplacebo/gpu.h index a4377fa..21e13f8 100644 --- a/src/include/libplacebo/gpu.h +++ b/src/include/libplacebo/gpu.h @@ -507,6 +507,8 @@ struct pl_var pl_var_from_fmt(const struct pl_fmt *fmt, const char *name); // distinction between the columns of a matrix and the rows of an array. For // example, a mat2[10] and a vec2[20] share the same pl_var_layout - the stride // would be sizeof(vec2) and the size would be sizeof(vec2) * 2 * 10. +// +// For non-array/matrix types, `stride` is equal to `size`. struct pl_var_layout { size_t offset; // the starting offset of the first byte diff --git a/src/lcms.h b/src/lcms.h index 2c727fc..c96aa64 100644 --- a/src/lcms.h +++ b/src/lcms.h @@ -20,6 +20,8 @@ #include "common.h" #include "bstr/bstr.h" +#include + // Compute a transformation from one color profile to another, and fill the // provided array by the resulting 3DLUT. The array must have room for four // components per sample. diff --git a/src/libplacebo.sym b/src/libplacebo.sym new file mode 100644 index 0000000..c5c3490 --- /dev/null +++ b/src/libplacebo.sym @@ -0,0 +1,219 @@ +{ + global: + pl_3dlut_apply; + pl_3dlut_default_params; + pl_3dlut_update; + pl_bit_encoding_equal; + pl_buf_create; + pl_buf_destroy; + pl_buf_poll; + pl_buf_read; + pl_buf_storage_layout; + pl_buf_uniform_layout; + pl_buf_write; + pl_chroma_location_offset; + pl_color_adjustment_neutral; + pl_color_light_is_scene_referred; + pl_color_map_default_params; + pl_color_primaries_guess; + pl_color_primaries_is_wide_gamut; + pl_color_repr_decode; + pl_color_repr_equal; + pl_color_repr_hdtv; + pl_color_repr_jpeg; + pl_color_repr_merge; + pl_color_repr_normalize; + pl_color_repr_rgb; + pl_color_repr_sdtv; + pl_color_repr_uhdtv; + pl_color_repr_unknown; + pl_color_space_bt2020_hlg; + pl_color_space_bt709; + pl_color_space_equal; + pl_color_space_hdr10; + pl_color_space_infer; + pl_color_space_merge; + pl_color_space_monitor; + pl_color_space_srgb; + pl_color_space_unknown; + pl_color_system_guess_ycbcr; + pl_color_system_is_linear; + pl_color_system_is_ycbcr_like; + pl_color_transfer_nominal_peak; + pl_context_create; + pl_context_default_params; + pl_context_destroy; + pl_deband_default_params; + pl_desc_access_glsl_name; + pl_desc_namespace; + pl_dispatch_abort; + pl_dispatch_begin; + pl_dispatch_compute; + pl_dispatch_create; + pl_dispatch_destroy; + pl_dispatch_finish; + pl_dither_default_params; + pl_filter_bicubic; + pl_filter_box; + pl_filter_catmull_rom; + pl_filter_config_eq; + pl_filter_ewa_ginseng; + pl_filter_ewa_hann; + pl_filter_ewa_jinc; + pl_filter_ewa_lanczos; + pl_filter_ewa_robidoux; + pl_filter_ewa_robidouxsharp; + pl_filter_free; + pl_filter_function_bcspline; + pl_filter_function_bicubic; + pl_filter_function_blackman; + pl_filter_function_box; + pl_filter_function_catmull_rom; + pl_filter_function_eq; + pl_filter_function_gaussian; + pl_filter_function_hamming; + pl_filter_function_hann; + pl_filter_function_jinc; + pl_filter_function_kaiser; + pl_filter_function_mitchell; + pl_filter_function_robidoux; + pl_filter_function_robidouxsharp; + pl_filter_function_sinc; + pl_filter_function_sphinx; + pl_filter_function_spline16; + pl_filter_function_spline36; + pl_filter_function_spline64; + pl_filter_function_triangle; + pl_filter_function_welch; + pl_filter_gaussian; + pl_filter_generate; + pl_filter_ginseng; + pl_filter_haasnsoft; + pl_filter_lanczos; + pl_filter_mitchell; + pl_filter_robidoux; + pl_filter_robidouxsharp; + pl_filter_sample; + pl_filter_sinc; + pl_filter_spline16; + pl_filter_spline36; + pl_filter_spline64; + pl_filter_triangle; + pl_find_fmt; + pl_find_named_filter; + pl_find_named_filter_function; + pl_find_named_fmt; + pl_find_vertex_fmt; + pl_fmt_is_ordered; + pl_generate_bayer_matrix; + pl_generate_blue_noise; + pl_get_color_mapping_matrix; + pl_get_cone_matrix; + pl_get_rgb2xyz_matrix; + pl_get_xyz2rgb_matrix; + pl_gpu_flush; + pl_icc_profile_equal; + pl_log_color; + pl_log_simple; + pl_matrix2x2_apply; + pl_matrix2x2_identity; + pl_matrix3x3_apply; + pl_matrix3x3_identity; + pl_matrix3x3_invert; + pl_matrix3x3_mul; + pl_matrix3x3_scale; + pl_named_filter_functions; + pl_named_filters; + pl_optimal_transfer_stride; + pl_pass_create; + pl_pass_destroy; + pl_pass_run; + pl_plane_data_from_mask; + pl_plane_find_fmt; + pl_push_constant_layout; + pl_raw_primaries_get; + pl_rect2d_normalize; + pl_rect3d_normalize; + pl_render_default_params; + pl_renderer_create; + pl_renderer_destroy; + pl_renderer_flush_cache; + pl_render_image; + pl_render_target_from_swapchain; + pl_shader_alloc; + pl_shader_color_map; + pl_shader_cone_distort; + pl_shader_deband; + pl_shader_decode_color; + pl_shader_delinearize; + pl_shader_dither; + pl_shader_encode_color; + pl_shader_finalize; + pl_shader_free; + pl_shader_is_compute; + pl_shader_linearize; + pl_shader_obj_destroy; + pl_shader_output_size; + pl_shader_reset; + pl_shader_sample_bicubic; + pl_shader_sample_direct; + pl_shader_sample_ortho; + pl_shader_sample_polar; + pl_shader_sigmoidize; + pl_shader_signature; + pl_shader_unsigmoidize; + pl_sigmoid_default_params; + pl_swapchain_destroy; + pl_swapchain_latency; + pl_swapchain_start_frame; + pl_swapchain_submit_frame; + pl_swapchain_swap_buffers; + pl_tex_blit; + pl_tex_clear; + pl_tex_create; + pl_tex_destroy; + pl_tex_download; + pl_tex_invalidate; + pl_tex_upload; + pl_transform2x2_apply; + pl_transform2x2_identity; + pl_transform3x3_apply; + pl_transform3x3_identity; + pl_transform3x3_invert; + pl_transform3x3_scale; + pl_upload_plane; + pl_var_float; + pl_var_from_fmt; + pl_var_glsl_type_name; + pl_var_host_layout; + pl_var_mat2; + pl_var_mat3; + pl_var_mat4; + pl_var_type_size; + pl_var_uint; + pl_var_vec2; + pl_var_vec3; + pl_var_vec4; + pl_vision_achromatopsia; + pl_vision_deuteranomaly; + pl_vision_deuteranopia; + pl_vision_monochromacy; + pl_vision_normal; + pl_vision_protanomaly; + pl_vision_protanopia; + pl_vision_tritanomaly; + pl_vision_tritanopia; + pl_vk_inst_create; + pl_vk_inst_default_params; + pl_vk_inst_destroy; + pl_vulkan_create; + pl_vulkan_create_swapchain; + pl_vulkan_default_params; + pl_vulkan_destroy; + pl_vulkan_hold; + pl_vulkan_release; + pl_vulkan_wrap; + + local: + *; +}; diff --git a/src/meson.build b/src/meson.build index b920abf..f2d7577 100644 --- a/src/meson.build +++ b/src/meson.build @@ -6,8 +6,7 @@ version = majorver + '.' + apiver + '.' + fixver c_opts = [ '-D_ISOC99_SOURCE', '-D_GNU_SOURCE', '-D_XOPEN_SOURCE=700', - '-U__STRICT_ANSI__', '-fvisibility=hidden', - '-Wmissing-prototypes', '-Wno-pointer-sign' + '-U__STRICT_ANSI__', '-Wmissing-prototypes', '-Wno-pointer-sign' ] # glslang needs c++11 @@ -199,15 +198,21 @@ configure_file( configuration: conf, ) +# Symbol visibility +symfile = 'libplacebo.sym' +vflag = '-Wl,--version-script,@0@/@1@'.format(meson.current_source_dir(), symfile) + # Build process add_project_arguments(build_opts + c_opts, language: 'c') add_project_arguments(build_opts + cpp_opts, language: 'cpp') inc = include_directories('./include') + lib = library('placebo', sources, install: true, dependencies: build_deps, soversion: apiver, include_directories: inc, + link_args: vflag, ) # Install process @@ -228,6 +233,7 @@ if get_option('tests') foreach t : tests e = executable('test.' + t, 'tests/' + t, dependencies: build_deps + tdeps, + objects: lib.extract_all_objects(), include_directories: inc ) diff --git a/src/renderer.c b/src/renderer.c index 753f3b9..9400d77 100644 --- a/src/renderer.c +++ b/src/renderer.c @@ -21,6 +21,8 @@ #include "shaders.h" #include "dispatch.h" +#include + enum { // The scalers for each plane are set up to be just the index itself SCALER_PLANE0 = 0, diff --git a/src/shaders.c b/src/shaders.c index 767b649..a21efc5 100644 --- a/src/shaders.c +++ b/src/shaders.c @@ -490,9 +490,13 @@ ident_t sh_prng(struct pl_shader *sh, bool temporal, ident_t *p_state) " return fract(state * 1.0/41.0); \n" "}\n", permute, randfun, permute); + // Phi is the most irrational number, so it's a good candidate for + // generating seed values to the PRNG + static const double phi = 1.618033988749895; + const char *seed = "0.0"; if (temporal) { - float seedval = modff(M_PI * sh->index, &(float){0}); + float seedval = modff(phi * sh->index, &(float){0}); seed = sh_var(sh, (struct pl_shader_var) { .var = pl_var_float("seed"), .data = &seedval, @@ -501,8 +505,10 @@ ident_t sh_prng(struct pl_shader *sh, bool temporal, ident_t *p_state) } ident_t state = sh_fresh(sh, "prng"); - GLSL("vec3 %s_m = vec3(gl_FragCoord.xy, %s) + vec3(1.0); \n" - "float %s = %s(%s(%s(%s_m.x) + %s_m.y) + %s_m.z); \n", + GLSL("vec2 init = fract(gl_FragCoord.xy * vec2(%f)); \n" + "vec3 %s_m = vec3(init, %s) + vec3(1.0); \n" + "float %s = %s(%s(%s(%s_m.x) + %s_m.y) + %s_m.z); \n", + phi, state, seed, state, permute, permute, permute, state, state, state); diff --git a/src/shaders.h b/src/shaders.h index 6b355c5..9c82831 100644 --- a/src/shaders.h +++ b/src/shaders.h @@ -24,6 +24,8 @@ #include "context.h" #include "gpu.h" +#include + // This represents an identifier (e.g. name of function, uniform etc.) for // a shader resource. The generated identifiers are immutable, but only live // until pl_shader_reset - so make copies when passing to external stuff. diff --git a/src/shaders/colorspace.c b/src/shaders/colorspace.c index bb74bed..cd2eb52 100644 --- a/src/shaders/colorspace.c +++ b/src/shaders/colorspace.c @@ -18,6 +18,9 @@ #include #include "shaders.h" +#include +#include + void pl_shader_decode_color(struct pl_shader *sh, struct pl_color_repr *repr, const struct pl_color_adjustment *params) { diff --git a/src/shaders/sampling.c b/src/shaders/sampling.c index e68fa21..6f3cd81 100644 --- a/src/shaders/sampling.c +++ b/src/shaders/sampling.c @@ -18,6 +18,8 @@ #include #include "shaders.h" +#include + const struct pl_deband_params pl_deband_default_params = { .iterations = 1, .threshold = 4.0, diff --git a/src/spirv.h b/src/spirv.h index d1ecc39..b96e39b 100644 --- a/src/spirv.h +++ b/src/spirv.h @@ -20,6 +20,8 @@ #include "common.h" #include "context.h" +#include + enum glsl_shader_stage { GLSL_SHADER_VERTEX, GLSL_SHADER_FRAGMENT, diff --git a/src/swapchain.h b/src/swapchain.h index d71e909..82a4edc 100644 --- a/src/swapchain.h +++ b/src/swapchain.h @@ -18,6 +18,7 @@ #pragma once #include "common.h" +#include #define SW_PFN(name) __typeof__(pl_swapchain_##name) *name struct pl_sw_fns { diff --git a/src/tests/bench.c b/src/tests/bench.c index 4e7008a..523e719 100644 --- a/src/tests/bench.c +++ b/src/tests/bench.c @@ -1,6 +1,11 @@ #include "tests.h" #include +#include +#include +#include +#include + #define TEX_SIZE 2048 #define CUBE_SIZE 64 #define NUM_FBOS 10 diff --git a/src/tests/colorspace.c b/src/tests/colorspace.c index 670df3a..f52f917 100644 --- a/src/tests/colorspace.c +++ b/src/tests/colorspace.c @@ -1,4 +1,5 @@ #include "tests.h" +#include int main() { diff --git a/src/tests/dither.c b/src/tests/dither.c index d827746..8865beb 100644 --- a/src/tests/dither.c +++ b/src/tests/dither.c @@ -1,4 +1,5 @@ #include "tests.h" +#include #define SHIFT 4 #define SIZE (1 << SHIFT) diff --git a/src/tests/filters.c b/src/tests/filters.c index 57d1c4b..e0a221e 100644 --- a/src/tests/filters.c +++ b/src/tests/filters.c @@ -1,4 +1,5 @@ #include "tests.h" +#include int main() { diff --git a/src/tests/gpu_tests.h b/src/tests/gpu_tests.h index b69c693..1d5d22f 100644 --- a/src/tests/gpu_tests.h +++ b/src/tests/gpu_tests.h @@ -1,6 +1,12 @@ #include "tests.h" #include "shaders.h" +#include +#include +#include +#include +#include + static void pl_test_roundtrip(const struct pl_gpu *gpu, const struct pl_tex *tex, float *src, float *dst) { diff --git a/src/tests/tests.h b/src/tests/tests.h index 9b27e4c..2653e26 100644 --- a/src/tests/tests.h +++ b/src/tests/tests.h @@ -18,6 +18,7 @@ #pragma once #include "common.h" +#include #include #include diff --git a/src/tests/utils.c b/src/tests/utils.c index bf62f60..e7e1029 100644 --- a/src/tests/utils.c +++ b/src/tests/utils.c @@ -1,4 +1,7 @@ #include "tests.h" +#include "gpu.h" + +#include int main() { @@ -25,4 +28,63 @@ int main() REQUIRE(data.component_size[i] == 10); REQUIRE(data.component_map[i] == i); } + + // Test GLSL structure packing + struct pl_var vec1 = pl_var_float(""), + vec2 = pl_var_vec2(""), + vec3 = pl_var_vec3(""), + mat2 = pl_var_mat2(""), + mat3 = pl_var_mat3(""); + + struct pl_var_layout layout; + layout = std140_layout(NULL, 0, &vec2); + REQUIRE(layout.offset == 0); + REQUIRE(layout.stride == 2 * sizeof(float)); + REQUIRE(layout.size == 2 * sizeof(float)); + + layout = std140_layout(NULL, 3 * sizeof(float), &vec3); + REQUIRE(layout.offset == 4 * sizeof(float)); + REQUIRE(layout.stride == 3 * sizeof(float)); + REQUIRE(layout.size == 3 * sizeof(float)); + + layout = std140_layout(NULL, 2 * sizeof(float), &mat3); + REQUIRE(layout.offset == 4 * sizeof(float)); + REQUIRE(layout.stride == 4 * sizeof(float)); + REQUIRE(layout.size == 3 * 4 * sizeof(float)); + + layout = std430_layout(NULL, 2 * sizeof(float), &mat3); + REQUIRE(layout.offset == 4 * sizeof(float)); + REQUIRE(layout.stride == 3 * sizeof(float)); + REQUIRE(layout.size == 3 * 3 * sizeof(float)); + + layout = std140_layout(NULL, 3 * sizeof(float), &vec1); + REQUIRE(layout.offset == 3 * sizeof(float)); + REQUIRE(layout.stride == sizeof(float)); + REQUIRE(layout.size == sizeof(float)); + + struct pl_var vec2a = vec2; + vec2a.dim_a = 50; + + layout = std140_layout(NULL, sizeof(float), &vec2a); + REQUIRE(layout.offset == 4 * sizeof(float)); + REQUIRE(layout.stride == 4 * sizeof(float)); + REQUIRE(layout.size == 50 * 4 * sizeof(float)); + + layout = std430_layout(NULL, sizeof(float), &vec2a); + REQUIRE(layout.offset == 2 * sizeof(float)); + REQUIRE(layout.stride == 2 * sizeof(float)); + REQUIRE(layout.size == 50 * 2 * sizeof(float)); + + struct pl_var mat2a = mat2; + mat2a.dim_a = 20; + + layout = std140_layout(NULL, 5 * sizeof(float), &mat2a); + REQUIRE(layout.offset == 8 * sizeof(float)); + REQUIRE(layout.stride == 4 * sizeof(float)); + REQUIRE(layout.size == 20 * 2 * 4 * sizeof(float)); + + layout = std430_layout(NULL, 5 * sizeof(float), &mat2a); + REQUIRE(layout.offset == 6 * sizeof(float)); + REQUIRE(layout.stride == 2 * sizeof(float)); + REQUIRE(layout.size == 20 * 2 * 2 * sizeof(float)); } diff --git a/src/tests/vulkan.c b/src/tests/vulkan.c index b928afe..c29ed3c 100644 --- a/src/tests/vulkan.c +++ b/src/tests/vulkan.c @@ -1,4 +1,5 @@ #include "gpu_tests.h" +#include int main() { diff --git a/src/utils/upload.c b/src/utils/upload.c index e25a2fd..43867d4 100644 --- a/src/utils/upload.c +++ b/src/utils/upload.c @@ -21,6 +21,8 @@ #include "common.h" #include "gpu.h" +#include + struct comp { int order; // e.g. 0, 1, 2, 3 for RGBA int size; // size in bits diff --git a/src/vulkan/common.h b/src/vulkan/common.h index 04d6e27..a7bd4b5 100644 --- a/src/vulkan/common.h +++ b/src/vulkan/common.h @@ -20,6 +20,8 @@ #include "../common.h" #include "../context.h" +#include + // Vulkan allows the optional use of a custom allocator. We don't need one but // mark this parameter with a better name in case we ever decide to change this // in the future. (And to make the code more readable) diff --git a/src/vulkan/context.c b/src/vulkan/context.c index 41bb1c7..3d4756c 100644 --- a/src/vulkan/context.c +++ b/src/vulkan/context.c @@ -287,7 +287,7 @@ static bool find_physical_device(struct vk_ctx *vk, continue; } - if (params->device_name) { + if (params->device_name && params->device_name[0] != '\0') { if (strcmp(params->device_name, props.deviceName) == 0) { vk->physd = devices[i]; best = 10; // high number... diff --git a/src/vulkan/utils.c b/src/vulkan/utils.c index c663a5c..7c138fe 100644 --- a/src/vulkan/utils.c +++ b/src/vulkan/utils.c @@ -145,6 +145,9 @@ const char *vk_obj_str(VkDebugReportObjectTypeEXT obj) CASE(OBJECT_TABLE_NVX, VkObjectTableNVX); CASE(INDIRECT_COMMANDS_LAYOUT_NVX, VkIndirectCommandsLayoutNVX); #endif +#ifdef VK_NVX_raytracing + CASE(ACCELERATION_STRUCTURE_NVX, VkAccelerationStructureNVX); +#endif // Included to satisfy the switch coverage check case VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT: -- 2.17.1