--- a/configure.ac
+++ b/configure.ac
@@ -3420,6 +3420,9 @@
 AC_ARG_ENABLE(mmal,
   AS_HELP_STRING([--enable-mmal],
     [Multi-Media Abstraction Layer (MMAL) hardware plugin (default enable)]))
+AC_ARG_ENABLE(mmal_avcodec,
+  AS_HELP_STRING([--enable-mmal-avcodec],
+    [Use MMAL enabled avcodec libs (default disable)]))
 if test "${enable_mmal}" != "no"; then
   VLC_SAVE_FLAGS
   LDFLAGS="${LDFLAGS} -L/opt/vc/lib -lvchostif"
@@ -3430,7 +3433,7 @@
         VLC_ADD_PLUGIN([mmal])
         VLC_ADD_LDFLAGS([mmal],[ -L/opt/vc/lib ])
         VLC_ADD_CFLAGS([mmal],[ -isystem /opt/vc/include -isystem /opt/vc/include/interface/vcos/pthreads -isystem /opt/vc/include/interface/vmcs_host/linux ])
-        VLC_ADD_LIBS([mmal],[ -lbcm_host -lmmal -lmmal_core -lmmal_components -lmmal_util -lvchostif ]) ], [
+        VLC_ADD_LIBS([mmal],[ -lbcm_host -lmmal -lmmal_core -lmmal_components -lmmal_util -lvchostif -lvchiq_arm -lvcsm ]) ], [
           AS_IF([test "${enable_mmal}" = "yes"],
             [ AC_MSG_ERROR([Cannot find bcm library...]) ],
             [ AC_MSG_WARN([Cannot find bcm library...]) ])
@@ -3442,6 +3445,7 @@
   VLC_RESTORE_FLAGS
 fi
 AM_CONDITIONAL([HAVE_MMAL], [test "${have_mmal}" = "yes"])
+AM_CONDITIONAL([HAVE_MMAL_AVCODEC], [test "${enable_mmal_avcodec}" = "yes"])
 
 dnl
 dnl evas plugin
--- a/include/vlc_fourcc.h
+++ b/include/vlc_fourcc.h
@@ -365,6 +365,11 @@
 
 /* Broadcom MMAL opaque buffer type */
 #define VLC_CODEC_MMAL_OPAQUE     VLC_FOURCC('M','M','A','L')
+#define VLC_CODEC_MMAL_ZC_SAND8   VLC_FOURCC('Z','S','D','8')
+#define VLC_CODEC_MMAL_ZC_SAND10  VLC_FOURCC('Z','S','D','0')
+#define VLC_CODEC_MMAL_ZC_SAND30  VLC_FOURCC('Z','S','D','3')
+#define VLC_CODEC_MMAL_ZC_I420    VLC_FOURCC('Z','4','2','0')
+#define VLC_CODEC_MMAL_ZC_RGB32   VLC_FOURCC('Z','R','G','B')
 
 /* DXVA2 opaque video surface for use with D3D9 */
 #define VLC_CODEC_D3D9_OPAQUE     VLC_FOURCC('D','X','A','9') /* 4:2:0  8 bpc */
--- a/modules/hw/mmal/Makefile.am
+++ b/modules/hw/mmal/Makefile.am
@@ -1,23 +1,46 @@
 include $(top_srcdir)/modules/common.am
 mmaldir = $(pluginsdir)/mmal
 
-AM_CFLAGS += $(CFLAGS_mmal)
-AM_LDFLAGS += -rpath '$(mmaldir)' $(LDFLAGS_mmal)
+AM_CFLAGS += -pthread $(CFLAGS_mmal)
+AM_LDFLAGS += -pthread -rpath '$(mmaldir)' $(LDFLAGS_mmal)
 
-libmmal_vout_plugin_la_SOURCES = vout.c mmal_picture.c mmal_picture.h
+libmmal_vout_plugin_la_SOURCES = vout.c subpic.c mmal_picture.c subpic.h  mmal_cma.c mmal_picture.h mmal_piccpy_neon.S
 libmmal_vout_plugin_la_CFLAGS = $(AM_CFLAGS)
 libmmal_vout_plugin_la_LDFLAGS = $(AM_LDFLAGS) -lm
 libmmal_vout_plugin_la_LIBADD = $(LIBS_mmal)
 mmal_LTLIBRARIES = libmmal_vout_plugin.la
 
-libmmal_codec_plugin_la_SOURCES = codec.c
+libmmal_codec_plugin_la_SOURCES = codec.c subpic.c mmal_picture.c blend_rgba_neon.S subpic.h mmal_picture.h mmal_piccpy_neon.S\
+  mmal_cma.c mmal_cma.h
 libmmal_codec_plugin_la_CFLAGS = $(AM_CFLAGS)
 libmmal_codec_plugin_la_LDFLAGS = $(AM_LDFLAGS)
 libmmal_codec_plugin_la_LIBADD = $(LIBS_mmal)
 mmal_LTLIBRARIES += libmmal_codec_plugin.la
 
-libmmal_deinterlace_plugin_la_SOURCES = deinterlace.c mmal_picture.c
+libmmal_deinterlace_plugin_la_SOURCES = deinterlace.c mmal_picture.c  mmal_cma.c mmal_picture.h mmal_piccpy_neon.S
 libmmal_deinterlace_plugin_la_CFLAGS = $(AM_CFLAGS)
 libmmal_deinterlace_plugin_la_LDFLAGS = $(AM_LDFLAGS)
 libmmal_deinterlace_plugin_la_LIBADD = $(LIBS_mmal)
 mmal_LTLIBRARIES += libmmal_deinterlace_plugin.la
+
+libmmal_xsplitter_plugin_la_SOURCES = xsplitter.c mmal_picture.c  mmal_cma.c mmal_picture.h mmal_piccpy_neon.S
+libmmal_xsplitter_plugin_la_CFLAGS = $(AM_CFLAGS)
+libmmal_xsplitter_plugin_la_LDFLAGS = $(AM_LDFLAGS)
+libmmal_xsplitter_plugin_la_LIBADD = $(LIBS_mmal)
+mmal_LTLIBRARIES += libmmal_xsplitter_plugin.la
+
+libmmal_converter_plugin_la_SOURCES = converter_mmal.c mmal_cma.c mmal_picture.c mmal_cma.h mmal_picture.h mmal_piccpy_neon.S
+libmmal_converter_plugin_la_CFLAGS = $(AM_CFLAGS)
+libmmal_converter_plugin_la_LDFLAGS = $(AM_LDFLAGS)
+libmmal_converter_plugin_la_LIBADD = $(LIBS_mmal)
+mmal_LTLIBRARIES += libmmal_converter_plugin.la
+
+if HAVE_MMAL_AVCODEC
+libmmal_avcodec_plugin_la_SOURCES = mmal_avcodec.c mmal_cma.c mmal_picture.c mmal_picture.h  mmal_cma.h mmal_piccpy_neon.S
+libmmal_avcodec_plugin_la_CFLAGS = $(AM_CFLAGS)
+libmmal_avcodec_plugin_la_LDFLAGS = $(AM_LDFLAGS)
+libmmal_avcodec_plugin_la_LIBADD = $(AVFORMAT_LIBS) $(AVUTIL_LIBS) $(LIBS_mmal)
+mmal_LTLIBRARIES += libmmal_avcodec_plugin.la
+endif
+
+
--- /dev/null
+++ b/modules/hw/mmal/blend_rgba_neon.S
@@ -0,0 +1,197 @@
+        .syntax unified
+        .arm
+//      .thumb
+        .text
+        .align 16
+        .arch armv7-a
+        .fpu neon-vfpv4
+
+@ blend_rgbx_rgba_neon
+
+@ Implements /255 as ((x * 257) + 0x8000) >> 16
+@ This generates something in the range [(x+126)/255, (x+127)/255] which is good enough
+
+@ There is advantage to aligning src and/or dest - dest gives a bit more due to being used twice
+
+
+
+@ [r0] RGBx dest      loaded into d20-d23
+@ [r1] RGBA src merge loaded into d16-d19
+@ r2   plane alpha
+@ r3   count (pixels)
+
+.macro blend_main sR, sG, sB, sA, dR, dG, dB, dA
+
+        push      { r4, lr }
+
+        vdup.u8    d7,  r2
+
+        subs       r3,  #8
+        vmov.u8    d6,  #0xff
+
+        blt        2f
+
+        @ If < 16 bytes to move then don't bother trying to align
+        @ (a) This means the the align doesn't need to worry about r3 underflow
+        @ (b) The overhead would be greater than any gain
+        cmp        r3,  #8
+        mov        r4,  r3
+        ble        1f
+
+        @ Align r1 on a 32 byte boundary
+        neg        r3,  r0
+        ubfx       r3,  r3,  #2,  #3
+
+        cmp        r3,  #0
+        blne       10f
+
+        sub        r3,  r4,  r3
+
+1:
+        vld4.8    {d16, d17, d18, d19}, [r1]
+
+1:
+        vmull.u8   q15, \sA, d7
+
+        vld4.8    {d20, d21, d22, d23}, [r0]
+
+        vsra.u16   q15, q15, #8
+        subs       r3,  #8
+        vrshrn.u16 d31, q15, #8
+        vsub.u8    d30, d6,  d31
+
+        vmull.u8   q12, \sR, d31
+        vmull.u8   q13, \sG, d31
+        vmull.u8   q14, \sB, d31
+        addge      r1,  #32
+
+        vmlal.u8   q12, \dR, d30
+        vmlal.u8   q13, \dG, d30
+        vmlal.u8   q14, \dB, d30
+        vld4.8    {d16, d17, d18, d19}, [r1]
+
+        vsra.u16   q12, q12, #8         @ * 257/256
+        vsra.u16   q13, q13, #8
+        vsra.u16   q14, q14, #8
+
+        vrshrn.u16 \dR, q12, #8
+        vrshrn.u16 \dG, q13, #8
+        vrshrn.u16 \dB, q14, #8
+        vmov.u8    \dA, #0xff
+
+        vst4.8    {d20, d21, d22, d23}, [r0]!
+        bge        1b
+        add        r1,  #32
+
+2:
+        cmp        r3,  #-8
+        blgt       10f
+
+        pop       { r4, pc }
+
+
+// Partial version
+// Align @ start & deal with tail
+10:
+        lsls       r2,  r3,  #30        @ b2 -> C, b1 -> N
+        mov        r2,  r0
+        bcc        1f
+        vld4.8    {d16[0], d17[0], d18[0], d19[0]}, [r1]!
+        vld4.8    {d20[0], d21[0], d22[0], d23[0]}, [r2]!
+        vld4.8    {d16[1], d17[1], d18[1], d19[1]}, [r1]!
+        vld4.8    {d20[1], d21[1], d22[1], d23[1]}, [r2]!
+        vld4.8    {d16[2], d17[2], d18[2], d19[2]}, [r1]!
+        vld4.8    {d20[2], d21[2], d22[2], d23[2]}, [r2]!
+        vld4.8    {d16[3], d17[3], d18[3], d19[3]}, [r1]!
+        vld4.8    {d20[3], d21[3], d22[3], d23[3]}, [r2]!
+1:
+        bpl        1f
+        vld4.8    {d16[4], d17[4], d18[4], d19[4]}, [r1]!
+        vld4.8    {d20[4], d21[4], d22[4], d23[4]}, [r2]!
+        vld4.8    {d16[5], d17[5], d18[5], d19[5]}, [r1]!
+        vld4.8    {d20[5], d21[5], d22[5], d23[5]}, [r2]!
+1:
+        tst        r3,  #1
+        beq        1f
+        vld4.8    {d16[6], d17[6], d18[6], d19[6]}, [r1]!
+        vld4.8    {d20[6], d21[6], d22[6], d23[6]}, [r2]!
+1:
+        @ Set conditions for later
+        lsls       r2,  r3,  #30        @ b2 -> C, b1 -> N
+
+        vmull.u8   q15, \sA, d7
+        vsra.u16   q15, q15, #8
+        vrshrn.u16 d31, q15, #8
+        vsub.u8    d30, d6,  d31
+
+        vmull.u8   q12, \sR, d31
+        vmull.u8   q13, \sG, d31
+        vmull.u8   q14, \sB, d31
+
+        vmlal.u8   q12, \dR, d30
+        vmlal.u8   q13, \dG, d30
+        vmlal.u8   q14, \dB, d30
+
+        vsra.u16   q12, q12, #8
+        vsra.u16   q13, q13, #8
+        vsra.u16   q14, q14, #8
+
+        vrshrn.u16 \dR, q12, #8
+        vrshrn.u16 \dG, q13, #8
+        vrshrn.u16 \dB, q14, #8
+        vmov.u8    \dA, #0xff
+
+        bcc        1f
+        vst4.8    {d20[0], d21[0], d22[0], d23[0]}, [r0]!
+        vst4.8    {d20[1], d21[1], d22[1], d23[1]}, [r0]!
+        vst4.8    {d20[2], d21[2], d22[2], d23[2]}, [r0]!
+        vst4.8    {d20[3], d21[3], d22[3], d23[3]}, [r0]!
+1:
+        bpl        1f
+        vst4.8    {d20[4], d21[4], d22[4], d23[4]}, [r0]!
+        vst4.8    {d20[5], d21[5], d22[5], d23[5]}, [r0]!
+1:
+        tst        r3,  #1
+        bxeq       lr
+        vst4.8    {d20[6], d21[6], d22[6], d23[6]}, [r0]!
+
+        bx         lr
+
+.endm
+
+
+@ [r0] RGBx dest      (Byte order: R, G, B, x)
+@ [r1] RGBA src merge (Byte order: R, G, B, A)
+@ r2   plane alpha
+@ r3   count (pixels)
+
+@ Whilst specified as RGBx+RGBA the only important part is the position of
+@ alpha, the other components are all treated the same
+
+@ [r0] RGBx dest      (Byte order: R, G, B, x)
+@ [r1] RGBA src merge (Byte order: R, G, B, A) - same as above
+@ r2   plane alpha
+@ r3   count (pixels)
+        .align  16
+        .global blend_rgbx_rgba_neon
+#ifdef __ELF__
+        .type   blend_rgbx_rgba_neon, %function
+#endif
+blend_rgbx_rgba_neon:
+        blend_main d16, d17, d18, d19, d20, d21, d22, d23
+
+
+@ [r0] RGBx dest      (Byte order: R, G, B, x)
+@ [r1] RGBA src merge (Byte order: B, G, R, A) - B / R swapped
+@ r2   plane alpha
+@ r3   count (pixels)
+        .align  16
+        .global blend_bgrx_rgba_neon
+#ifdef __ELF__
+        .type   blend_bgrx_rgba_neon, %function
+#endif
+blend_bgrx_rgba_neon:
+        blend_main d18, d17, d16, d19, d20, d21, d22, d23
+
+
+
--- /dev/null
+++ b/modules/hw/mmal/blend_rgba_neon.h
@@ -0,0 +1,17 @@
+#ifndef HW_MMAL_BLEND_RGBA_NEON_H
+#define HW_MMAL_BLEND_RGBA_NEON_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void blend_neon_fn(void * dest, const void * src, int alpha, unsigned int n);
+extern blend_neon_fn blend_rgbx_rgba_neon;
+extern blend_neon_fn blend_bgrx_rgba_neon;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
--- /dev/null
+++ b/modules/hw/mmal/blend_test.c
@@ -0,0 +1,180 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <memory.h>
+
+#include "blend_rgba_neon.h"
+
+#define RPI_PROFILE 1
+#define RPI_PROC_ALLOC 1
+#include "rpi_prof.h"
+
+static inline unsigned div255(unsigned v)
+{
+    // This models what we we do in the asm for / 255
+    // It generates something in the range [(i+126)/255, (i+127)/255] which is good enough
+    return ((v * 257) + 0x8000) >> 16;
+}
+
+static inline unsigned int a_merge(unsigned int dst, unsigned src, unsigned f)
+{
+    return div255((255 - f) * (dst) + src * f);
+}
+
+
+static void merge_line(void * dest, const void * src, int alpha, unsigned int n)
+{
+    unsigned int i;
+    const uint8_t * s_data = src;
+    uint8_t * d_data = dest;
+
+    for (i = 0; i != n; ++i) {
+        const uint32_t s_pel = ((const uint32_t *)s_data)[i];
+        const uint32_t d_pel = ((const uint32_t *)d_data)[i];
+        const unsigned int a = div255(alpha * (s_pel >> 24));
+        ((uint32_t *)d_data)[i] = 0xff000000 |
+            (a_merge((d_pel >> 16) & 0xff, (s_pel >> 16) & 0xff, a) << 16) |
+            (a_merge((d_pel >> 8)  & 0xff, (s_pel >> 8)  & 0xff, a) << 8 ) |
+            (a_merge((d_pel >> 0)  & 0xff, (s_pel >> 0)  & 0xff, a) << 0 );
+    }
+}
+
+
+// Merge RGBA with BGRA
+static void merge_line2(void * dest, const void * src, int alpha, unsigned int n)
+{
+    unsigned int i;
+    const uint8_t * s_data = src;
+    uint8_t * d_data = dest;
+
+    for (i = 0; i != n; ++i) {
+        const uint32_t s_pel = ((const uint32_t *)s_data)[i];
+        const uint32_t d_pel = ((const uint32_t *)d_data)[i];
+        const unsigned int a = div255(alpha * (s_pel >> 24));
+        ((uint32_t *)d_data)[i] = 0xff000000 |
+            (a_merge((d_pel >> 0)  & 0xff, (s_pel >> 16) & 0xff, a) << 0 ) |
+            (a_merge((d_pel >> 8)  & 0xff, (s_pel >> 8)  & 0xff, a) << 8 ) |
+            (a_merge((d_pel >> 16) & 0xff, (s_pel >> 0)  & 0xff, a) << 16);
+    }
+}
+
+#define BUF_SIZE   256
+#define BUF_SLACK  16
+#define BUF_ALIGN  64
+#define BUF_ALLOC  (BUF_SIZE + 2*BUF_SLACK + BUF_ALIGN)
+
+static void test_line(const uint32_t * const dx, const unsigned int d_off,
+                      const uint32_t * const sx, const unsigned int s_off,
+                      const unsigned int alpha, const unsigned int len, const int prof_no)
+{
+    uint32_t d0_buf[BUF_ALLOC];
+    uint32_t d1_buf[BUF_ALLOC];
+    const uint32_t * const s0 = sx + s_off;
+
+    uint32_t * const d0 =  (uint32_t *)(((uintptr_t)d0_buf + (BUF_ALIGN - 1)) & ~(BUF_ALIGN - 1)) + d_off;
+    uint32_t * const d1 = (uint32_t *)(((uintptr_t)d1_buf + (BUF_ALIGN - 1)) & ~(BUF_ALIGN - 1)) + d_off;
+    unsigned int i;
+
+    memcpy(d0, dx, (BUF_SIZE + BUF_SLACK*2)*4);
+    memcpy(d1, dx, (BUF_SIZE + BUF_SLACK*2)*4);
+
+    merge_line(d0 + BUF_SLACK, s0 + BUF_SLACK, alpha, len);
+
+    PROFILE_START();
+    blend_rgbx_rgba_neon(d1 + BUF_SLACK, s0 + BUF_SLACK, alpha, len);
+    PROFILE_ACC_N(prof_no);
+
+    for (i = 0; i != BUF_SIZE + BUF_SLACK*2; ++i) {
+        if (d0[i] != d1[i]) {
+            printf("%3d: %08x + %08x * %02x: %08x / %08x: len=%d\n", (int)(i - BUF_SLACK), dx[i], s0[i], alpha, d0[i], d1[i], len);
+        }
+    }
+}
+
+static void test_line2(const uint32_t * const dx, const unsigned int d_off,
+                      const uint32_t * const sx, const unsigned int s_off,
+                      const unsigned int alpha, const unsigned int len, const int prof_no)
+{
+    uint32_t d0_buf[BUF_ALLOC];
+    uint32_t d1_buf[BUF_ALLOC];
+    const uint32_t * const s0 = sx + s_off;
+
+    uint32_t * const d0 =  (uint32_t *)(((uintptr_t)d0_buf + (BUF_ALIGN - 1)) & ~(BUF_ALIGN - 1)) + d_off;
+    uint32_t * const d1 = (uint32_t *)(((uintptr_t)d1_buf + (BUF_ALIGN - 1)) & ~(BUF_ALIGN - 1)) + d_off;
+    unsigned int i;
+
+    memcpy(d0, dx, (BUF_SIZE + BUF_SLACK*2)*4);
+    memcpy(d1, dx, (BUF_SIZE + BUF_SLACK*2)*4);
+
+    merge_line2(d0 + BUF_SLACK, s0 + BUF_SLACK, alpha, len);
+
+    PROFILE_START();
+    blend_bgrx_rgba_neon(d1 + BUF_SLACK, s0 + BUF_SLACK, alpha, len);
+    PROFILE_ACC_N(prof_no);
+
+    for (i = 0; i != BUF_SIZE + BUF_SLACK*2; ++i) {
+        if (d0[i] != d1[i]) {
+            printf("%3d: %08x + %08x * %02x: %08x / %08x: len=%d\n", (int)(i - BUF_SLACK), dx[i], s0[i], alpha, d0[i], d1[i], len);
+        }
+    }
+}
+
+
+
+int main(int argc, char *argv[])
+{
+    unsigned int i, j;
+    uint32_t d0_buf[BUF_ALLOC];
+    uint32_t s0_buf[BUF_ALLOC];
+
+    uint32_t * const d0 = (uint32_t *)(((uintptr_t)d0_buf + 63) & ~63) + 0;
+    uint32_t * const s0 = (uint32_t *)(((uintptr_t)s0_buf + 63) & ~63) + 0;
+
+    PROFILE_INIT();
+
+    for (i = 0; i != 255*255; ++i) {
+        unsigned int a = div255(i);
+        unsigned int b = (i + 127)/255;
+        unsigned int c = (i + 126)/255;
+        if (a != b && a != c)
+            printf("%d/255: %d != %d/%d\n", i, a, b, c);
+    }
+
+    for (i = 0; i != BUF_ALLOC; ++i) {
+        d0_buf[i] = 0xff00 | i;
+        s0_buf[i] = (i << 24) | 0x40ffc0;
+    }
+
+    for (i = 0; i != 256; ++i) {
+        test_line(d0, 0, s0, 0, i, 256, -1);
+    }
+    for (i = 0; i != 256; ++i) {
+        test_line(d0, 0, s0, 0, 128, i, -1);
+    }
+
+    for (j = 0; j != 16; ++j) {
+        for (i = 0; i != 256; ++i) {
+            test_line(d0, j & 3, s0, j >> 2, i, 256, j);
+        }
+        PROFILE_PRINTF_N(j);
+        PROFILE_CLEAR_N(j);
+    }
+    printf("Done 1\n");
+
+    for (i = 0; i != 256; ++i) {
+        test_line2(d0, 0, s0, 0, i, 256, -1);
+    }
+    for (i = 0; i != 256; ++i) {
+        test_line2(d0, 0, s0, 0, 128, i, -1);
+    }
+
+    for (j = 0; j != 16; ++j) {
+        for (i = 0; i != 256; ++i) {
+            test_line2(d0, j & 3, s0, j >> 2, i, 256, j);
+        }
+        PROFILE_PRINTF_N(j);
+    }
+    printf("Done 2\n");
+
+    return 0;
+}
+
--- a/modules/hw/mmal/codec.c
+++ b/modules/hw/mmal/codec.c
@@ -26,267 +26,396 @@
 #include "config.h"
 #endif
 
+#include <stdatomic.h>
+
 #include <vlc_common.h>
-#include <vlc_atomic.h>
 #include <vlc_plugin.h>
 #include <vlc_codec.h>
+#include <vlc_filter.h>
 #include <vlc_threads.h>
 
-#include <bcm_host.h>
 #include <interface/mmal/mmal.h>
 #include <interface/mmal/util/mmal_util.h>
 #include <interface/mmal/util/mmal_default_components.h>
 
+#include <interface/vcsm/user-vcsm.h>
+
+#include "mmal_cma.h"
 #include "mmal_picture.h"
 
+#include "subpic.h"
+#include "blend_rgba_neon.h"
+
+#define TRACE_ALL 0
+
+#define OPT_TO_FROM_ZC 0
+
 /*
  * This seems to be a bit high, but reducing it causes instabilities
  */
 #define NUM_EXTRA_BUFFERS 5
+//#define NUM_EXTRA_BUFFERS 10
 #define NUM_DECODER_BUFFER_HEADERS 30
 
-#define MIN_NUM_BUFFERS_IN_TRANSIT 2
+#define CONVERTER_BUFFERS 4  // Buffers on the output of the converter
+
+#define MMAL_SLICE_HEIGHT 16
+#define MMAL_ALIGN_W      32
+#define MMAL_ALIGN_H      16
 
 #define MMAL_OPAQUE_NAME "mmal-opaque"
 #define MMAL_OPAQUE_TEXT N_("Decode frames directly into RPI VideoCore instead of host memory.")
 #define MMAL_OPAQUE_LONGTEXT N_("Decode frames directly into RPI VideoCore instead of host memory. This option must only be used with the MMAL video output plugin.")
 
-static int OpenDecoder(decoder_t *dec);
-static void CloseDecoder(decoder_t *dec);
+#define MMAL_RESIZE_NAME "mmal-resize"
+#define MMAL_RESIZE_TEXT N_("Use mmal resizer rather than hvs.")
+#define MMAL_RESIZE_LONGTEXT N_("Use mmal resizer rather than isp. This uses less gpu memory than the ISP but is slower.")
+
+#define MMAL_ISP_NAME "mmal-isp"
+#define MMAL_ISP_TEXT N_("Use mmal isp rather than hvs.")
+#define MMAL_ISP_LONGTEXT N_("Use mmal isp rather than hvs. This may be faster but has no blend.")
 
-vlc_module_begin()
-    set_shortname(N_("MMAL decoder"))
-    set_description(N_("MMAL-based decoder plugin for Raspberry Pi"))
-    set_capability("video decoder", 90)
-    add_shortcut("mmal_decoder")
-    add_bool(MMAL_OPAQUE_NAME, true, MMAL_OPAQUE_TEXT, MMAL_OPAQUE_LONGTEXT, false)
-    set_callbacks(OpenDecoder, CloseDecoder)
-vlc_module_end()
-
-struct decoder_sys_t {
-    bool opaque;
+typedef struct decoder_sys_t
+{
     MMAL_COMPONENT_T *component;
     MMAL_PORT_T *input;
     MMAL_POOL_T *input_pool;
     MMAL_PORT_T *output;
-    MMAL_POOL_T *output_pool; /* only used for non-opaque mode */
+    hw_mmal_port_pool_ref_t *ppr;
     MMAL_ES_FORMAT_T *output_format;
-    vlc_sem_t sem;
 
+    MMAL_STATUS_T err_stream;
     bool b_top_field_first;
     bool b_progressive;
 
+    bool b_flushed;
+
+    vcsm_init_type_t vcsm_init_type;
+
+    // Lock to avoid pic update & allocate happenening simultainiously
+    // * We should be able to arrange life s.t. this isn't needed
+    //   but while we are confused apply belt & braces
+    vlc_mutex_t pic_lock;
+
     /* statistics */
-    int output_in_transit;
-    int input_in_transit;
     atomic_bool started;
-};
+} decoder_sys_t;
 
-/* Utilities */
-static int change_output_format(decoder_t *dec);
-static int send_output_buffer(decoder_t *dec);
-static void fill_output_port(decoder_t *dec);
-
-/* VLC decoder callback */
-static int decode(decoder_t *dec, block_t *block);
-static void flush_decoder(decoder_t *dec);
-
-/* MMAL callbacks */
-static void control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer);
-static void input_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer);
-static void output_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer);
 
-static int OpenDecoder(decoder_t *dec)
-{
-    int ret = VLC_SUCCESS;
-    decoder_sys_t *sys;
-    MMAL_PARAMETER_UINT32_T extra_buffers;
-    MMAL_STATUS_T status;
+typedef struct supported_mmal_enc_s {
+    struct {
+       MMAL_PARAMETER_HEADER_T header;
+       MMAL_FOURCC_T encodings[64];
+    } supported;
+    int n;
+} supported_mmal_enc_t;
+
+#define SUPPORTED_MMAL_ENC_INIT \
+{ \
+    {{MMAL_PARAMETER_SUPPORTED_ENCODINGS, sizeof(((supported_mmal_enc_t *)0)->supported)}, {0}}, \
+    -1 \
+}
 
-    if (dec->fmt_in.i_codec != VLC_CODEC_MPGV &&
-            dec->fmt_in.i_codec != VLC_CODEC_H264)
-        return VLC_EGENERIC;
+static supported_mmal_enc_t supported_decode_in_enc = SUPPORTED_MMAL_ENC_INIT;
 
-    sys = calloc(1, sizeof(decoder_sys_t));
-    if (!sys) {
-        ret = VLC_ENOMEM;
-        goto out;
+static bool is_enc_supported(supported_mmal_enc_t * const support, const MMAL_FOURCC_T fcc)
+{
+    int i;
+
+    if (fcc == 0)
+        return false;
+    if (support->n == -1)
+        return true;  // Unknown - say OK
+    for (i = 0; i < support->n; ++i) {
+        if (support->supported.encodings[i] == fcc)
+            return true;
     }
-    dec->p_sys = sys;
+    return false;
+}
 
-    sys->opaque = var_InheritBool(dec, MMAL_OPAQUE_NAME);
-    bcm_host_init();
+static bool set_and_test_enc_supported(supported_mmal_enc_t * const support, MMAL_PORT_T * port, const MMAL_FOURCC_T fcc)
+{
+    if (support->n >= 0)
+        /* already done */;
+    else if (mmal_port_parameter_get(port, (MMAL_PARAMETER_HEADER_T *)&support->supported) != MMAL_SUCCESS)
+        support->n = 0;
+    else
+        support->n = (support->supported.header.size - sizeof(support->supported.header)) /
+          sizeof(support->supported.encodings[0]);
 
-    status = mmal_component_create(MMAL_COMPONENT_DEFAULT_VIDEO_DECODER, &sys->component);
-    if (status != MMAL_SUCCESS) {
-        msg_Err(dec, "Failed to create MMAL component %s (status=%"PRIx32" %s)",
-                MMAL_COMPONENT_DEFAULT_VIDEO_DECODER, status, mmal_status_to_string(status));
-        ret = VLC_EGENERIC;
-        goto out;
-    }
+    return is_enc_supported(support, fcc);
+}
 
-    sys->component->control->userdata = (struct MMAL_PORT_USERDATA_T *)dec;
-    status = mmal_port_enable(sys->component->control, control_port_cb);
-    if (status != MMAL_SUCCESS) {
-        msg_Err(dec, "Failed to enable control port %s (status=%"PRIx32" %s)",
-                sys->component->control->name, status, mmal_status_to_string(status));
-        ret = VLC_EGENERIC;
-        goto out;
+static MMAL_FOURCC_T vlc_to_mmal_es_fourcc(const unsigned int fcc)
+{
+    switch (fcc){
+    case VLC_CODEC_MJPG:
+        return MMAL_ENCODING_MJPEG;
+    case VLC_CODEC_MP1V:
+        return MMAL_ENCODING_MP1V;
+    case VLC_CODEC_MPGV:
+    case VLC_CODEC_MP2V:
+        return MMAL_ENCODING_MP2V;
+    case VLC_CODEC_H263:
+        return MMAL_ENCODING_H263;
+    case VLC_CODEC_MP4V:
+        return MMAL_ENCODING_MP4V;
+    case VLC_CODEC_H264:
+        return MMAL_ENCODING_H264;
+    case VLC_CODEC_VP6:
+        return MMAL_ENCODING_VP6;
+    case VLC_CODEC_VP8:
+        return MMAL_ENCODING_VP8;
+    case VLC_CODEC_WMV1:
+        return MMAL_ENCODING_WMV1;
+    case VLC_CODEC_WMV2:
+        return MMAL_ENCODING_WMV2;
+    case VLC_CODEC_WMV3:
+        return MMAL_ENCODING_WMV3;
+    case VLC_CODEC_VC1:
+        return MMAL_ENCODING_WVC1;
+    case VLC_CODEC_THEORA:
+        return MMAL_ENCODING_THEORA;
+    default:
+        break;
     }
+    return 0;
+}
 
-    sys->input = sys->component->input[0];
-    sys->input->userdata = (struct MMAL_PORT_USERDATA_T *)dec;
-    if (dec->fmt_in.i_codec == VLC_CODEC_MPGV)
-        sys->input->format->encoding = MMAL_ENCODING_MP2V;
-    else
-        sys->input->format->encoding = MMAL_ENCODING_H264;
+static MMAL_FOURCC_T pic_to_slice_mmal_fourcc(const MMAL_FOURCC_T fcc)
+{
+    switch (fcc){
+    case MMAL_ENCODING_I420:
+        return MMAL_ENCODING_I420_SLICE;
+    case MMAL_ENCODING_I422:
+        return MMAL_ENCODING_I422_SLICE;
+    case MMAL_ENCODING_ARGB:
+        return MMAL_ENCODING_ARGB_SLICE;
+    case MMAL_ENCODING_RGBA:
+        return MMAL_ENCODING_RGBA_SLICE;
+    case MMAL_ENCODING_ABGR:
+        return MMAL_ENCODING_ABGR_SLICE;
+    case MMAL_ENCODING_BGRA:
+        return MMAL_ENCODING_BGRA_SLICE;
+    case MMAL_ENCODING_RGB16:
+        return MMAL_ENCODING_RGB16_SLICE;
+    case MMAL_ENCODING_RGB24:
+        return MMAL_ENCODING_RGB24_SLICE;
+    case MMAL_ENCODING_RGB32:
+        return MMAL_ENCODING_RGB32_SLICE;
+    case MMAL_ENCODING_BGR16:
+        return MMAL_ENCODING_BGR16_SLICE;
+    case MMAL_ENCODING_BGR24:
+        return MMAL_ENCODING_BGR24_SLICE;
+    case MMAL_ENCODING_BGR32:
+        return MMAL_ENCODING_BGR32_SLICE;
+    default:
+        break;
+    }
+    return 0;
+}
 
-    if (dec->fmt_in.i_codec == VLC_CODEC_H264) {
-        if (dec->fmt_in.i_extra > 0) {
-            status = mmal_format_extradata_alloc(sys->input->format,
-                    dec->fmt_in.i_extra);
-            if (status == MMAL_SUCCESS) {
-                memcpy(sys->input->format->extradata, dec->fmt_in.p_extra,
-                        dec->fmt_in.i_extra);
-                sys->input->format->extradata_size = dec->fmt_in.i_extra;
-            } else {
-                msg_Err(dec, "Failed to allocate extra format data on input port %s (status=%"PRIx32" %s)",
-                        sys->input->name, status, mmal_status_to_string(status));
-            }
+#define DEBUG_SQUARES 0
+#if DEBUG_SQUARES
+static void draw_square(void * pic_buf, size_t pic_stride, unsigned int x, unsigned int y, unsigned int w, unsigned int h, uint32_t val)
+{
+    uint32_t * p = (uint32_t *)pic_buf + y * pic_stride + x;
+    unsigned int i;
+    for (i = 0; i != h; ++i) {
+        unsigned int j;
+        for (j = 0; j != w; ++j) {
+            p[j] = val;
         }
+        p += pic_stride;
     }
+}
+#endif
 
-    status = mmal_port_format_commit(sys->input);
-    if (status != MMAL_SUCCESS) {
-        msg_Err(dec, "Failed to commit format for input port %s (status=%"PRIx32" %s)",
-                sys->input->name, status, mmal_status_to_string(status));
-        ret = VLC_EGENERIC;
-        goto out;
+#if 0
+static inline void draw_line(void * pic_buf, size_t pic_stride, unsigned int x, unsigned int y, unsigned int len, int inc)
+{
+    uint32_t * p = (uint32_t *)pic_buf + y * pic_stride + x;
+    while (len-- != 0) {
+        *p = ~0U;
+        p += inc;
     }
-    sys->input->buffer_size = sys->input->buffer_size_recommended;
-    sys->input->buffer_num = sys->input->buffer_num_recommended;
+}
 
-    status = mmal_port_enable(sys->input, input_port_cb);
-    if (status != MMAL_SUCCESS) {
-        msg_Err(dec, "Failed to enable input port %s (status=%"PRIx32" %s)",
-                sys->input->name, status, mmal_status_to_string(status));
-        ret = VLC_EGENERIC;
-        goto out;
-    }
 
-    sys->output = sys->component->output[0];
-    sys->output->userdata = (struct MMAL_PORT_USERDATA_T *)dec;
+static void draw_corners(void * pic_buf, size_t pic_stride, unsigned int x, unsigned int y, unsigned int w, unsigned int h)
+{
+    const unsigned int len = 20;
+    draw_line(pic_buf, pic_stride, x, y, len, 1);
+    draw_line(pic_buf, pic_stride, x, y, len, pic_stride);
+    draw_line(pic_buf, pic_stride, x + w - 1, y, len, -1);
+    draw_line(pic_buf, pic_stride, x + w - 1, y, len, pic_stride);
+    draw_line(pic_buf, pic_stride, x + w - 1, y + h - 1, len, -1);
+    draw_line(pic_buf, pic_stride, x + w - 1, y + h - 1, len, -(int)pic_stride);
+    draw_line(pic_buf, pic_stride, x, y + h - 1, len, 1);
+    draw_line(pic_buf, pic_stride, x, y + h - 1, len, -(int)pic_stride);
+}
+#endif
 
-    if (sys->opaque) {
-        extra_buffers.hdr.id = MMAL_PARAMETER_EXTRA_BUFFERS;
-        extra_buffers.hdr.size = sizeof(MMAL_PARAMETER_UINT32_T);
-        extra_buffers.value = NUM_EXTRA_BUFFERS;
-        status = mmal_port_parameter_set(sys->output, &extra_buffers.hdr);
-        if (status != MMAL_SUCCESS) {
-            msg_Err(dec, "Failed to set MMAL_PARAMETER_EXTRA_BUFFERS on output port (status=%"PRIx32" %s)",
-                    status, mmal_status_to_string(status));
-            ret = VLC_EGENERIC;
-            goto out;
-        }
+// Buffer either attached to pic or released
+static picture_t * alloc_opaque_pic(decoder_t * const dec, MMAL_BUFFER_HEADER_T * const buf)
+{
+    decoder_sys_t *const dec_sys = dec->p_sys;
 
-        msg_Dbg(dec, "Activate zero-copy for output port");
-        MMAL_PARAMETER_BOOLEAN_T zero_copy = {
-            { MMAL_PARAMETER_ZERO_COPY, sizeof(MMAL_PARAMETER_BOOLEAN_T) },
-            1
-        };
+    vlc_mutex_lock(&dec_sys->pic_lock);
+    picture_t * const pic = decoder_NewPicture(dec);
+    vlc_mutex_unlock(&dec_sys->pic_lock);
 
-        status = mmal_port_parameter_set(sys->output, &zero_copy.hdr);
-        if (status != MMAL_SUCCESS) {
-           msg_Err(dec, "Failed to set zero copy on port %s (status=%"PRIx32" %s)",
-                    sys->output->name, status, mmal_status_to_string(status));
-           goto out;
-        }
-    }
+    if (pic == NULL)
+        goto fail1;
 
-    status = mmal_port_enable(sys->output, output_port_cb);
-    if (status != MMAL_SUCCESS) {
-        msg_Err(dec, "Failed to enable output port %s (status=%"PRIx32" %s)",
-                sys->output->name, status, mmal_status_to_string(status));
-        ret = VLC_EGENERIC;
-        goto out;
+    if (buf->length == 0) {
+        msg_Err(dec, "%s: Empty buffer", __func__);
+        goto fail2;
     }
 
-    status = mmal_component_enable(sys->component);
-    if (status != MMAL_SUCCESS) {
-        msg_Err(dec, "Failed to enable component %s (status=%"PRIx32" %s)",
-                sys->component->name, status, mmal_status_to_string(status));
-        ret = VLC_EGENERIC;
-        goto out;
-    }
+    if ((pic->context = hw_mmal_gen_context(buf, dec_sys->ppr)) == NULL)
+        goto fail2;
 
-    sys->input_pool = mmal_pool_create(sys->input->buffer_num, 0);
+    buf_to_pic_copy_props(pic, buf);
 
-    if (sys->opaque) {
-        dec->fmt_out.i_codec = VLC_CODEC_MMAL_OPAQUE;
-        dec->fmt_out.video.i_chroma = VLC_CODEC_MMAL_OPAQUE;
-    } else {
-        dec->fmt_out.i_codec = VLC_CODEC_I420;
-        dec->fmt_out.video.i_chroma = VLC_CODEC_I420;
+#if TRACE_ALL
+    msg_Dbg(dec, "pic: prog=%d, tff=%d, date=%lld", pic->b_progressive, pic->b_top_field_first, (long long)pic->date);
+#endif
+
+    return pic;
+
+fail2:
+    picture_Release(pic);
+fail1:
+    // Recycle rather than release to avoid buffer starvation if NewPic fails
+    hw_mmal_port_pool_ref_recycle(dec_sys->ppr, buf);
+    return NULL;
+}
+
+static void control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
+{
+    decoder_t *dec = (decoder_t *)port->userdata;
+    MMAL_STATUS_T status;
+
+#if TRACE_ALL
+    msg_Dbg(dec, "<<< %s: cmd=%d, data=%p", __func__, buffer->cmd, buffer->data);
+#endif
+
+    if (buffer->cmd == MMAL_EVENT_ERROR) {
+        status = *(uint32_t *)buffer->data;
+        dec->p_sys->err_stream = status;
+        msg_Err(dec, "MMAL error %"PRIx32" \"%s\"", status,
+                mmal_status_to_string(status));
     }
 
-    dec->pf_decode = decode;
-    dec->pf_flush  = flush_decoder;
+    mmal_buffer_header_release(buffer);
+}
 
-    vlc_sem_init(&sys->sem, 0);
+static void input_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
+{
+    block_t * const block = (block_t *)buffer->user_data;
 
-out:
-    if (ret != VLC_SUCCESS)
-        CloseDecoder(dec);
+    (void)port;  // Unused
 
-    return ret;
+#if TRACE_ALL
+    msg_Dbg((decoder_t *)port->userdata, "<<< %s: cmd=%d, data=%p, len=%d/%d, pts=%lld", __func__,
+            buffer->cmd, buffer->data, buffer->length, buffer->alloc_size, (long long)buffer->pts);
+#endif
+
+    mmal_buffer_header_reset(buffer);
+    mmal_buffer_header_release(buffer);
+
+    if (block != NULL)
+        block_Release(block);
 }
 
-static void CloseDecoder(decoder_t *dec)
+static void decoder_output_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
 {
-    decoder_sys_t *sys = dec->p_sys;
-    MMAL_BUFFER_HEADER_T *buffer;
+    decoder_t * const dec = (decoder_t *)port->userdata;
 
-    if (!sys)
+    if (buffer->cmd == 0 && buffer->length != 0)
+    {
+#if TRACE_ALL
+        msg_Dbg(dec, "<<< %s: cmd=%d, data=%p, len=%d/%d, pts=%lld", __func__,
+                buffer->cmd, buffer->data, buffer->length, buffer->alloc_size, (long long)buffer->pts);
+#endif
+
+        picture_t *pic = alloc_opaque_pic(dec, buffer);
+#if TRACE_ALL
+        msg_Dbg(dec, "flags=%#x, video flags=%#x", buffer->flags, buffer->type->video.flags);
+#endif
+        if (pic == NULL)
+            msg_Err(dec, "Failed to allocate new picture");
+        else
+            decoder_QueueVideo(dec, pic);
+        // Buffer released or attached to pic - do not release again
         return;
+    }
 
-    if (sys->component && sys->component->control->is_enabled)
-        mmal_port_disable(sys->component->control);
+    if (buffer->cmd == MMAL_EVENT_FORMAT_CHANGED)
+    {
+        decoder_sys_t * const sys = dec->p_sys;
+        MMAL_EVENT_FORMAT_CHANGED_T * const fmt = mmal_event_format_changed_get(buffer);
+        MMAL_ES_FORMAT_T * const format = mmal_format_alloc();
 
-    if (sys->input && sys->input->is_enabled)
-        mmal_port_disable(sys->input);
+        if (format == NULL)
+            msg_Err(dec, "Failed to allocate new format");
+        else
+        {
+            mmal_format_full_copy(format, fmt->format);
+            format->encoding = MMAL_ENCODING_OPAQUE;
 
-    if (sys->output && sys->output->is_enabled)
-        mmal_port_disable(sys->output);
+            if (sys->output_format != NULL)
+                mmal_format_free(sys->output_format);
 
-    if (sys->component && sys->component->is_enabled)
-        mmal_component_disable(sys->component);
+            sys->output_format = format;
+        }
+    }
+    else if (buffer->cmd != 0) {
+        char buf0[5];
+        msg_Warn(dec, "Unexpected output cb event: %s", str_fourcc(buf0, buffer->cmd));
+    }
 
-    if (sys->input_pool)
-        mmal_pool_destroy(sys->input_pool);
+    // If we get here then we were flushing (cmd == 0 && len == 0) or
+    // that was an EVENT - in either case we want to release the buffer
+    // back to its pool rather than recycle it.
+    mmal_buffer_header_reset(buffer);
+    buffer->user_data = NULL;
+    mmal_buffer_header_release(buffer);
+}
 
-    if (sys->output_format)
-        mmal_format_free(sys->output_format);
 
-    if (sys->output_pool)
-        mmal_pool_destroy(sys->output_pool);
 
-    if (sys->component)
-        mmal_component_release(sys->component);
+static void fill_output_port(decoder_t *dec)
+{
+    decoder_sys_t *sys = dec->p_sys;
 
-    vlc_sem_destroy(&sys->sem);
-    free(sys);
+    if (decoder_UpdateVideoFormat(dec) != 0)
+    {
+        // If we have a new format don't bother stuffing the buffer
+        // We should get a reset RSN
+#if TRACE_ALL
+        msg_Dbg(dec, "%s: Updated", __func__);
+#endif
 
-    bcm_host_deinit();
+        return;
+    }
+
+    hw_mmal_port_pool_ref_fill(sys->ppr);
+    return;
 }
 
 static int change_output_format(decoder_t *dec)
 {
     MMAL_PARAMETER_VIDEO_INTERLACE_TYPE_T interlace_type;
-    decoder_sys_t *sys = dec->p_sys;
+    decoder_sys_t * const sys = dec->p_sys;
     MMAL_STATUS_T status;
-    int pool_size;
     int ret = 0;
 
+#if TRACE_ALL
+    msg_Dbg(dec, "%s: <<<", __func__);
+#endif
+
     if (atomic_load(&sys->started)) {
         mmal_format_full_copy(sys->output->format, sys->output_format);
         status = mmal_port_format_commit(sys->output);
@@ -300,7 +429,9 @@
     }
 
 port_reset:
+#if TRACE_ALL
     msg_Dbg(dec, "%s: Do full port reset", __func__);
+#endif
     status = mmal_port_disable(sys->output);
     if (status != MMAL_SUCCESS) {
         msg_Err(dec, "Failed to disable output port (status=%"PRIx32" %s)",
@@ -318,18 +449,10 @@
         goto out;
     }
 
-    if (sys->opaque) {
-        sys->output->buffer_num = NUM_DECODER_BUFFER_HEADERS;
-        pool_size = NUM_DECODER_BUFFER_HEADERS;
-    } else {
-        sys->output->buffer_num = __MAX(sys->output->buffer_num_recommended,
-                MIN_NUM_BUFFERS_IN_TRANSIT);
-        pool_size = sys->output->buffer_num;
-    }
-
+    sys->output->buffer_num = NUM_DECODER_BUFFER_HEADERS;
     sys->output->buffer_size = sys->output->buffer_size_recommended;
 
-    status = mmal_port_enable(sys->output, output_port_cb);
+    status = mmal_port_enable(sys->output, decoder_output_cb);
     if (status != MMAL_SUCCESS) {
         msg_Err(dec, "Failed to enable output port (status=%"PRIx32" %s)",
                 status, mmal_status_to_string(status));
@@ -338,25 +461,14 @@
     }
 
     if (!atomic_load(&sys->started)) {
-        if (!sys->opaque) {
-            sys->output_pool = mmal_port_pool_create(sys->output, pool_size, 0);
-            msg_Dbg(dec, "Created output pool with %d pictures", sys->output_pool->headers_num);
-        }
-
         atomic_store(&sys->started, true);
 
         /* we need one picture from vout for each buffer header on the output
          * port */
-        dec->i_extra_picture_buffers = pool_size;
-
-        /* remove what VLC core reserves as it is part of the pool_size
-         * already */
-        if (dec->fmt_in.i_codec == VLC_CODEC_H264)
-            dec->i_extra_picture_buffers -= 19;
-        else
-            dec->i_extra_picture_buffers -= 3;
-
+        dec->i_extra_picture_buffers = 10;
+#if TRACE_ALL
         msg_Dbg(dec, "Request %d extra pictures", dec->i_extra_picture_buffers);
+#endif
     }
 
 apply_fmt:
@@ -382,12 +494,19 @@
         sys->b_progressive = (interlace_type.eMode == MMAL_InterlaceProgressive);
         sys->b_top_field_first = sys->b_progressive ? true :
             (interlace_type.eMode == MMAL_InterlaceFieldsInterleavedUpperFirst);
+#if TRACE_ALL
         msg_Dbg(dec, "Detected %s%s video (%d)",
                 sys->b_progressive ? "progressive" : "interlaced",
                 sys->b_progressive ? "" : (sys->b_top_field_first ? " tff" : " bff"),
                 interlace_type.eMode);
+#endif
     }
 
+    // Tell the rest of the world we have changed format
+    vlc_mutex_lock(&sys->pic_lock);
+    ret = decoder_UpdateVideoFormat(dec);
+    vlc_mutex_unlock(&sys->pic_lock);
+
 out:
     mmal_format_free(sys->output_format);
     sys->output_format = NULL;
@@ -395,144 +514,85 @@
     return ret;
 }
 
-static int send_output_buffer(decoder_t *dec)
+static MMAL_STATUS_T
+set_extradata_and_commit(decoder_t * const dec, decoder_sys_t * const sys)
 {
-    decoder_sys_t *sys = dec->p_sys;
-    MMAL_BUFFER_HEADER_T *buffer;
-    picture_sys_t *p_sys;
-    picture_t *picture = NULL;
     MMAL_STATUS_T status;
-    unsigned buffer_size = 0;
-    int ret = 0;
-
-    if (!sys->output->is_enabled)
-        return VLC_EGENERIC;
-
-    /* If local output pool is allocated, use it - this is only the case for
-     * non-opaque modes */
-    if (sys->output_pool) {
-        buffer = mmal_queue_get(sys->output_pool->queue);
-        if (!buffer) {
-            msg_Warn(dec, "Failed to get new buffer");
-            return VLC_EGENERIC;
-        }
-    }
-
-    if (!decoder_UpdateVideoFormat(dec))
-        picture = decoder_NewPicture(dec);
-    if (!picture) {
-        msg_Warn(dec, "Failed to get new picture");
-        ret = -1;
-        goto err;
-    }
-
-    p_sys = picture->p_sys;
-    for (int i = 0; i < picture->i_planes; i++)
-        buffer_size += picture->p[i].i_lines * picture->p[i].i_pitch;
-
-    if (sys->output_pool) {
-        mmal_buffer_header_reset(buffer);
-        buffer->alloc_size = sys->output->buffer_size;
-        if (buffer_size < sys->output->buffer_size) {
-            msg_Err(dec, "Retrieved picture with too small data block (%d < %d)",
-                    buffer_size, sys->output->buffer_size);
-            ret = VLC_EGENERIC;
-            goto err;
-        }
-
-        if (!sys->opaque)
-            buffer->data = picture->p[0].p_pixels;
-    } else {
-        buffer = p_sys->buffer;
-        if (!buffer) {
-            msg_Warn(dec, "Picture has no buffer attached");
-            picture_Release(picture);
-            return VLC_EGENERIC;
-        }
-        buffer->data = p_sys->buffer->data;
-    }
-    buffer->user_data = picture;
-    buffer->cmd = 0;
 
-    status = mmal_port_send_buffer(sys->output, buffer);
+    status = mmal_port_format_commit(sys->input);
     if (status != MMAL_SUCCESS) {
-        msg_Err(dec, "Failed to send buffer to output port (status=%"PRIx32" %s)",
-                status, mmal_status_to_string(status));
-        ret = -1;
-        goto err;
-    }
-    atomic_fetch_add(&sys->output_in_transit, 1);
-
-    return ret;
-
-err:
-    if (picture)
-        picture_Release(picture);
-    if (sys->output_pool && buffer) {
-        buffer->data = NULL;
-        mmal_buffer_header_release(buffer);
+        msg_Err(dec, "Failed to commit format for input port %s (status=%"PRIx32" %s)",
+                sys->input->name, status, mmal_status_to_string(status));
     }
-    return ret;
+    return status;
 }
 
-static void fill_output_port(decoder_t *dec)
+static MMAL_STATUS_T decoder_send_extradata(decoder_t * const dec, decoder_sys_t *const sys)
 {
-    decoder_sys_t *sys = dec->p_sys;
+    if (dec->fmt_in.i_codec == VLC_CODEC_H264 &&
+        dec->fmt_in.i_extra > 0)
+    {
+        MMAL_BUFFER_HEADER_T * const buf = mmal_queue_wait(sys->input_pool->queue);
+        MMAL_STATUS_T status;
+
+        mmal_buffer_header_reset(buf);
+        buf->cmd = 0;
+        buf->user_data = NULL;
+        buf->alloc_size = sys->input->buffer_size;
+        buf->length = dec->fmt_in.i_extra;
+        buf->data = dec->fmt_in.p_extra;
+        buf->flags = MMAL_BUFFER_HEADER_FLAG_CONFIG;
 
-    unsigned max_buffers_in_transit = 0;
-    int buffers_available = 0;
-    int buffers_to_send = 0;
-    int i;
-
-    if (sys->output_pool) {
-        max_buffers_in_transit = __MAX(sys->output_pool->headers_num,
-                MIN_NUM_BUFFERS_IN_TRANSIT);
-        buffers_available = mmal_queue_length(sys->output_pool->queue);
-    } else {
-        max_buffers_in_transit = NUM_DECODER_BUFFER_HEADERS;
-        buffers_available = NUM_DECODER_BUFFER_HEADERS - atomic_load(&sys->output_in_transit);
+        status = mmal_port_send_buffer(sys->input, buf);
+        if (status != MMAL_SUCCESS) {
+            msg_Err(dec, "Failed to send extradata buffer to input port (status=%"PRIx32" %s)",
+                    status, mmal_status_to_string(status));
+            return status;
+        }
     }
-    buffers_to_send = max_buffers_in_transit - atomic_load(&sys->output_in_transit);
 
-    if (buffers_to_send > buffers_available)
-        buffers_to_send = buffers_available;
-
-#ifndef NDEBUG
-    msg_Dbg(dec, "Send %d buffers to output port (available: %d, "
-                    "in_transit: %d, buffer_num: %d)",
-                    buffers_to_send, buffers_available,
-                    atomic_load(&sys->output_in_transit),
-                    sys->output->buffer_num);
-#endif
-    for (i = 0; i < buffers_to_send; ++i)
-        if (send_output_buffer(dec) < 0)
-            break;
+    return MMAL_SUCCESS;
 }
 
 static void flush_decoder(decoder_t *dec)
 {
-    decoder_sys_t *sys = dec->p_sys;
-    MMAL_BUFFER_HEADER_T *buffer;
-    MMAL_STATUS_T status;
+    decoder_sys_t *const sys = dec->p_sys;
+
+#if TRACE_ALL
+    msg_Dbg(dec, "%s: <<<", __func__);
+#endif
 
-    msg_Dbg(dec, "Flushing decoder ports...");
-    mmal_port_flush(sys->output);
-    mmal_port_flush(sys->input);
-
-    while (atomic_load(&sys->output_in_transit) ||
-           atomic_load(&sys->input_in_transit))
-        vlc_sem_wait(&sys->sem);
+    if (!sys->b_flushed) {
+        mmal_port_disable(sys->input);
+        mmal_port_disable(sys->output);
+        // We can leave the input disabled, but we want the output enabled
+        // in order to sink any buffers returning from other modules
+        mmal_port_enable(sys->output, decoder_output_cb);
+        sys->b_flushed = true;
+    }
+#if TRACE_ALL
+    msg_Dbg(dec, "%s: >>>", __func__);
+#endif
 }
 
 static int decode(decoder_t *dec, block_t *block)
 {
     decoder_sys_t *sys = dec->p_sys;
     MMAL_BUFFER_HEADER_T *buffer;
-    bool need_flush = false;
     uint32_t len;
     uint32_t flags = 0;
     MMAL_STATUS_T status;
 
+#if TRACE_ALL
+    msg_Dbg(dec, "<<< %s: %lld/%lld", __func__, block == NULL ? -1LL : block->i_dts, block == NULL ? -1LL : block->i_pts);
+#endif
+
+    if (sys->err_stream != MMAL_SUCCESS) {
+        msg_Err(dec, "MMAL error reported by ctrl");
+        flush_decoder(dec);
+        return VLCDEC_ECRITICAL;  /// I think they are all fatal
+    }
+
     /*
      * Configure output port if necessary
      */
@@ -541,18 +601,50 @@
             msg_Err(dec, "Failed to change output port format");
     }
 
-    if (!block)
-        goto out;
+    if (block == NULL)
+        return VLCDEC_SUCCESS;
 
     /*
      * Check whether full flush is required
      */
-    if (block && block->i_flags & BLOCK_FLAG_DISCONTINUITY) {
+    if (block->i_flags & BLOCK_FLAG_DISCONTINUITY) {
+#if TRACE_ALL
+        msg_Dbg(dec, "%s: >>> Discontinuity", __func__);
+#endif
         flush_decoder(dec);
+    }
+
+    if (block->i_buffer == 0)
+    {
         block_Release(block);
         return VLCDEC_SUCCESS;
     }
 
+    // Reenable stuff if the last thing we did was flush
+    if (!sys->output->is_enabled &&
+        (status = mmal_port_enable(sys->output, decoder_output_cb)) != MMAL_SUCCESS)
+    {
+        msg_Err(dec, "Output port enable failed");
+        goto fail;
+    }
+
+    if (!sys->input->is_enabled)
+    {
+        if ((status = set_extradata_and_commit(dec, sys)) != MMAL_SUCCESS)
+            goto fail;
+
+        if ((status = mmal_port_enable(sys->input, input_port_cb)) != MMAL_SUCCESS)
+        {
+            msg_Err(dec, "Input port enable failed");
+            goto fail;
+        }
+
+        if ((status = decoder_send_extradata(dec, sys)) != MMAL_SUCCESS)
+            goto fail;
+    }
+
+    // *** We cannot get a picture to put the result in 'till we have
+    // reported the size & the output stages have been set up
     if (atomic_load(&sys->started))
         fill_output_port(dec);
 
@@ -563,18 +655,21 @@
     if (block->i_flags & BLOCK_FLAG_CORRUPTED)
         flags |= MMAL_BUFFER_HEADER_FLAG_CORRUPTED;
 
-    while (block && block->i_buffer > 0) {
-        buffer = mmal_queue_timedwait(sys->input_pool->queue, 100);
+    while (block != NULL)
+    {
+        buffer = mmal_queue_wait(sys->input_pool->queue);
         if (!buffer) {
             msg_Err(dec, "Failed to retrieve buffer header for input data");
-            need_flush = true;
-            break;
+            goto fail;
         }
+
         mmal_buffer_header_reset(buffer);
         buffer->cmd = 0;
-        buffer->pts = block->i_pts != 0 ? block->i_pts : block->i_dts;
+        buffer->pts = block->i_pts != VLC_TICK_INVALID ? block->i_pts :
+            block->i_dts != VLC_TICK_INVALID ? block->i_dts : MMAL_TIME_UNKNOWN;
         buffer->dts = block->i_dts;
         buffer->alloc_size = sys->input->buffer_size;
+        buffer->user_data = NULL;
 
         len = block->i_buffer;
         if (len > buffer->alloc_size)
@@ -590,89 +685,1733 @@
         }
         buffer->flags = flags;
 
+#if TRACE_ALL
+        msg_Dbg(dec, "%s: -- Send buffer: len=%d", __func__, len);
+#endif
         status = mmal_port_send_buffer(sys->input, buffer);
         if (status != MMAL_SUCCESS) {
             msg_Err(dec, "Failed to send buffer to input port (status=%"PRIx32" %s)",
                     status, mmal_status_to_string(status));
-            break;
+            goto fail;
         }
-        atomic_fetch_add(&sys->input_in_transit, 1);
+
+        // Reset flushed flag once we have sent a buf
+        sys->b_flushed = false;
     }
+    return VLCDEC_SUCCESS;
 
-out:
-    if (need_flush)
-        flush_decoder(dec);
+fail:
+    flush_decoder(dec);
+    return VLCDEC_ECRITICAL;
 
-    return VLCDEC_SUCCESS;
 }
 
-static void control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
+
+static void CloseDecoder(decoder_t *dec)
 {
-    decoder_t *dec = (decoder_t *)port->userdata;
+    decoder_sys_t *sys = dec->p_sys;
+
+#if TRACE_ALL
+    msg_Dbg(dec, "%s: <<<", __func__);
+#endif
+
+    if (!sys)
+        return;
+
+    if (sys->component != NULL) {
+        if (sys->input->is_enabled)
+            mmal_port_disable(sys->input);
+
+        if (sys->output->is_enabled)
+            mmal_port_disable(sys->output);
+
+        if (sys->component->control->is_enabled)
+            mmal_port_disable(sys->component->control);
+
+        if (sys->component->is_enabled)
+            mmal_component_disable(sys->component);
+
+        mmal_component_release(sys->component);
+    }
+
+    if (sys->input_pool != NULL)
+        mmal_pool_destroy(sys->input_pool);
+
+    if (sys->output_format != NULL)
+        mmal_format_free(sys->output_format);
+
+    hw_mmal_port_pool_ref_release(sys->ppr, false);
+
+    cma_vcsm_exit(sys->vcsm_init_type);
+
+    vlc_mutex_destroy(&sys->pic_lock);
+    free(sys);
+}
+
+static int OpenDecoder(decoder_t *dec)
+{
+    int ret = VLC_EGENERIC;
+    decoder_sys_t *sys;
     MMAL_STATUS_T status;
+    const MMAL_FOURCC_T in_fcc = vlc_to_mmal_es_fourcc(dec->fmt_in.i_codec);
+
+#if TRACE_ALL || 1
+    {
+        char buf1[5], buf2[5], buf2a[5];
+        char buf3[5], buf4[5];
+        msg_Dbg(dec, "%s: <<< (%s/%s)[%s] %dx%d -> (%s/%s) %dx%d", __func__,
+                str_fourcc(buf1, dec->fmt_in.i_codec),
+                str_fourcc(buf2, dec->fmt_in.video.i_chroma),
+                str_fourcc(buf2a, in_fcc),
+                dec->fmt_in.video.i_width, dec->fmt_in.video.i_height,
+                str_fourcc(buf3, dec->fmt_out.i_codec),
+                str_fourcc(buf4, dec->fmt_out.video.i_chroma),
+                dec->fmt_out.video.i_width, dec->fmt_out.video.i_height);
+    }
+#endif
+
+    if (!is_enc_supported(&supported_decode_in_enc, in_fcc))
+        return VLC_EGENERIC;
+
+    sys = calloc(1, sizeof(decoder_sys_t));
+    if (!sys) {
+        ret = VLC_ENOMEM;
+        goto fail;
+    }
+    dec->p_sys = sys;
+    vlc_mutex_init(&sys->pic_lock);
+
+    if ((sys->vcsm_init_type = cma_vcsm_init()) == VCSM_INIT_NONE) {
+        msg_Err(dec, "VCSM init failed");
+        goto fail;
+    }
+    msg_Info(dec, "VCSM init succeeded: %s", cma_vcsm_init_str(sys->vcsm_init_type));
+
+    sys->err_stream = MMAL_SUCCESS;
+
+    status = mmal_component_create(MMAL_COMPONENT_DEFAULT_VIDEO_DECODER, &sys->component);
+    if (status != MMAL_SUCCESS) {
+        msg_Err(dec, "Failed to create MMAL component %s (status=%"PRIx32" %s)",
+                MMAL_COMPONENT_DEFAULT_VIDEO_DECODER, status, mmal_status_to_string(status));
+        goto fail;
+    }
+
+    sys->input = sys->component->input[0];
+    sys->output = sys->component->output[0];
+
+    sys->input->userdata = (struct MMAL_PORT_USERDATA_T *)dec;
+    sys->input->format->encoding = in_fcc;
+
+    if (!set_and_test_enc_supported(&supported_decode_in_enc, sys->input, in_fcc)) {
+#if TRACE_ALL
+        char cbuf[5];
+        msg_Dbg(dec, "Format not supported: %s", str_fourcc(cbuf, in_fcc));
+#endif
+        goto fail;
+    }
+
+    sys->component->control->userdata = (struct MMAL_PORT_USERDATA_T *)dec;
+    status = mmal_port_enable(sys->component->control, control_port_cb);
+    if (status != MMAL_SUCCESS) {
+        msg_Err(dec, "Failed to enable control port %s (status=%"PRIx32" %s)",
+                sys->component->control->name, status, mmal_status_to_string(status));
+        goto fail;
+    }
+
+    if ((status = set_extradata_and_commit(dec, sys)) != MMAL_SUCCESS)
+        goto fail;
+
+    sys->input->buffer_size = sys->input->buffer_size_recommended;
+    sys->input->buffer_num = sys->input->buffer_num_recommended;
+
+    status = mmal_port_enable(sys->input, input_port_cb);
+    if (status != MMAL_SUCCESS) {
+        msg_Err(dec, "Failed to enable input port %s (status=%"PRIx32" %s)",
+                sys->input->name, status, mmal_status_to_string(status));
+        goto fail;
+    }
+
+    if ((status = hw_mmal_opaque_output(VLC_OBJECT(dec), &sys->ppr,
+                                        sys->output, NUM_EXTRA_BUFFERS, decoder_output_cb)) != MMAL_SUCCESS)
+        goto fail;
+
+    status = mmal_component_enable(sys->component);
+    if (status != MMAL_SUCCESS) {
+        msg_Err(dec, "Failed to enable component %s (status=%"PRIx32" %s)",
+                sys->component->name, status, mmal_status_to_string(status));
+        goto fail;
+    }
+
+    if ((sys->input_pool = mmal_pool_create(sys->input->buffer_num, 0)) == NULL)
+    {
+        msg_Err(dec, "Failed to create input pool");
+        goto fail;
+    }
+
+    sys->b_flushed = true;
+    dec->fmt_out.i_codec = VLC_CODEC_MMAL_OPAQUE;
+    dec->fmt_out.video.i_chroma = VLC_CODEC_MMAL_OPAQUE;
+
+    if ((status = decoder_send_extradata(dec, sys)) != MMAL_SUCCESS)
+        goto fail;
+
+    dec->pf_decode = decode;
+    dec->pf_flush  = flush_decoder;
+
+#if TRACE_ALL
+    msg_Dbg(dec, ">>> %s: ok", __func__);
+#endif
+    return 0;
+
+fail:
+    CloseDecoder(dec);
+#if TRACE_ALL
+msg_Dbg(dec, ">>> %s: FAIL: ret=%d", __func__, ret);
+#endif
+    return ret;
+}
+
+// ----------------------------
+
+#define CONV_MAX_LATENCY 1  // In frames
+
+typedef struct pic_fifo_s {
+    picture_t * head;
+    picture_t * tail;
+} pic_fifo_t;
+
+static inline picture_t * pic_fifo_get(pic_fifo_t * const pf)
+{
+    picture_t * const pic = pf->head;;
+    if (pic != NULL) {
+        pf->head = pic->p_next;
+        pic->p_next = NULL;
+    }
+    return pic;
+}
+
+static inline picture_t * pic_fifo_get_all(pic_fifo_t * const pf)
+{
+    picture_t * const pic = pf->head;;
+    pf->head = NULL;
+    return pic;
+}
+
+static inline void pic_fifo_release_all(pic_fifo_t * const pf)
+{
+    picture_t * pic;
+    while ((pic = pic_fifo_get(pf)) != NULL) {
+        picture_Release(pic);
+    }
+}
+
+static inline void pic_fifo_init(pic_fifo_t * const pf)
+{
+    pf->head = NULL;
+    pf->tail = NULL;  // Not strictly needed
+}
+
+static inline void pic_fifo_put(pic_fifo_t * const pf, picture_t * pic)
+{
+    pic->p_next = NULL;
+    if (pf->head == NULL)
+        pf->head = pic;
+    else
+        pf->tail->p_next = pic;
+    pf->tail = pic;
+}
+
+#define SUBS_MAX 3
+
+typedef enum filter_resizer_e {
+    FILTER_RESIZER_RESIZER,
+    FILTER_RESIZER_ISP,
+    FILTER_RESIZER_HVS
+} filter_resizer_t;
+
+typedef struct conv_frame_stash_s
+{
+    mtime_t pts;
+    MMAL_BUFFER_HEADER_T * sub_bufs[SUBS_MAX];
+} conv_frame_stash_t;
+
+typedef struct filter_sys_t {
+    filter_resizer_t resizer_type;
+    MMAL_COMPONENT_T *component;
+    MMAL_PORT_T *input;
+    MMAL_PORT_T *output;
+    MMAL_POOL_T *out_pool;  // Free output buffers
+    MMAL_POOL_T *in_pool;   // Input pool to get BH for replication
+
+    cma_buf_pool_t * cma_in_pool;
+    cma_buf_pool_t * cma_out_pool;
+
+    subpic_reg_stash_t subs[SUBS_MAX];
+
+    pic_fifo_t ret_pics;
+
+    unsigned int pic_n;
+    vlc_sem_t sem;
+    vlc_mutex_t lock;
+
+    MMAL_STATUS_T err_stream;
+
+    bool needs_copy_in;
+    bool is_cma;
+    bool is_sliced;
+    bool out_fmt_set;
+    const char * component_name;
+    MMAL_PORT_BH_CB_T in_port_cb_fn;
+    MMAL_PORT_BH_CB_T out_port_cb_fn;
+
+    uint64_t frame_seq;
+    conv_frame_stash_t stash[16];
+
+    // Slice specific tracking stuff
+    struct {
+        pic_fifo_t pics;
+        unsigned int line;  // Lines filled
+    } slice;
+
+    vcsm_init_type_t vcsm_init_type;
+} filter_sys_t;
+
+
+static MMAL_STATUS_T pic_to_format(MMAL_ES_FORMAT_T * const es_fmt, const picture_t * const pic)
+{
+    unsigned int bpp = (pic->format.i_bits_per_pixel + 7) >> 3;
+    MMAL_VIDEO_FORMAT_T * const v_fmt = &es_fmt->es->video;
+
+    es_fmt->type = MMAL_ES_TYPE_VIDEO;
+    es_fmt->encoding = vlc_to_mmal_video_fourcc(&pic->format);
+    es_fmt->encoding_variant = 0;
+
+    // Fill in crop etc.
+    hw_mmal_vlc_fmt_to_mmal_fmt(es_fmt, &pic->format);
+    // Override width / height with strides if appropriate
+    if (bpp != 0) {
+        v_fmt->width = pic->p[0].i_pitch / bpp;
+        v_fmt->height = pic->p[0].i_lines;
+    }
+    return MMAL_SUCCESS;
+}
+
+
+static MMAL_STATUS_T conv_enable_in(filter_t * const p_filter, filter_sys_t * const sys)
+{
+    MMAL_STATUS_T err = MMAL_SUCCESS;
+
+    if (!sys->input->is_enabled &&
+        (err = mmal_port_enable(sys->input, sys->in_port_cb_fn)) != MMAL_SUCCESS)
+    {
+        msg_Err(p_filter, "Failed to enable input port %s (status=%"PRIx32" %s)",
+                sys->input->name, err, mmal_status_to_string(err));
+    }
+    return err;
+}
+
+static MMAL_STATUS_T conv_enable_out(filter_t * const p_filter, filter_sys_t * const sys)
+{
+    MMAL_STATUS_T err = MMAL_SUCCESS;
+
+    if (sys->is_cma)
+    {
+        if (sys->cma_out_pool == NULL &&
+            (sys->cma_out_pool = cma_buf_pool_new(CONVERTER_BUFFERS, CONVERTER_BUFFERS, true, "mmal_resizer")) == NULL)
+        {
+            msg_Err(p_filter, "Failed to alloc cma buf pool");
+            return MMAL_ENOMEM;
+        }
+    }
+    else
+    {
+        cma_buf_pool_deletez(&sys->cma_out_pool);
+    }
+
+    if (!sys->output->is_enabled &&
+        (err = mmal_port_enable(sys->output, sys->out_port_cb_fn)) != MMAL_SUCCESS)
+    {
+        msg_Err(p_filter, "Failed to enable output port %s (status=%"PRIx32" %s)",
+                sys->output->name, err, mmal_status_to_string(err));
+    }
+    return err;
+}
+
+static void conv_control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
+{
+    filter_t * const p_filter = (filter_t *)port->userdata;
+
+#if TRACE_ALL
+    msg_Dbg(p_filter, "%s: <<< cmd=%d, data=%p, pic=%p", __func__, buffer->cmd, buffer->data, buffer->user_data);
+#endif
 
     if (buffer->cmd == MMAL_EVENT_ERROR) {
-        status = *(uint32_t *)buffer->data;
-        msg_Err(dec, "MMAL error %"PRIx32" \"%s\"", status,
+        MMAL_STATUS_T status = *(uint32_t *)buffer->data;
+
+        p_filter->p_sys->err_stream = status;
+
+        msg_Err(p_filter, "MMAL error %"PRIx32" \"%s\"", status,
                 mmal_status_to_string(status));
     }
 
     mmal_buffer_header_release(buffer);
 }
 
-static void input_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
+static void conv_input_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buf)
 {
-    block_t *block = (block_t *)buffer->user_data;
-    decoder_t *dec = (decoder_t *)port->userdata;
-    decoder_sys_t *sys = dec->p_sys;
-    buffer->user_data = NULL;
+#if TRACE_ALL
+    picture_context_t * ctx = buf->user_data;
+//    filter_sys_t *const sys = ((filter_t *)port->userdata)->p_sys;
+
+    msg_Dbg((filter_t *)port->userdata, "<<< %s cmd=%d, ctx=%p, buf=%p, flags=%#x, len=%d/%d, pts=%lld",
+            __func__, buf->cmd, ctx, buf, buf->flags, buf->length, buf->alloc_size, (long long)buf->pts);
+#else
+    VLC_UNUSED(port);
+#endif
+
+    mmal_buffer_header_release(buf);
+
+#if TRACE_ALL
+    msg_Dbg((filter_t *)port->userdata, ">>> %s", __func__);
+#endif
+}
+
+static void conv_out_q_pic(filter_sys_t * const sys, picture_t * const pic)
+{
+    pic->p_next = NULL;
+
+    vlc_mutex_lock(&sys->lock);
+    pic_fifo_put(&sys->ret_pics, pic);
+    vlc_mutex_unlock(&sys->lock);
 
-    mmal_buffer_header_release(buffer);
-    if (block)
-        block_Release(block);
-    atomic_fetch_sub(&sys->input_in_transit, 1);
     vlc_sem_post(&sys->sem);
 }
 
-static void output_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
+static void conv_output_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buf)
 {
-    decoder_t *dec = (decoder_t *)port->userdata;
-    decoder_sys_t *sys = dec->p_sys;
-    picture_t *picture;
-    MMAL_EVENT_FORMAT_CHANGED_T *fmt;
-    MMAL_ES_FORMAT_T *format;
-
-    if (buffer->cmd == 0) {
-        picture = (picture_t *)buffer->user_data;
-        if (buffer->length > 0) {
-            picture->date = buffer->pts;
-            picture->b_progressive = sys->b_progressive;
-            picture->b_top_field_first = sys->b_top_field_first;
-            decoder_QueueVideo(dec, picture);
-        } else {
-            picture_Release(picture);
-            if (sys->output_pool) {
-                buffer->user_data = NULL;
-                buffer->alloc_size = 0;
-                buffer->data = NULL;
-                mmal_buffer_header_release(buffer);
-            }
-        }
-        atomic_fetch_sub(&sys->output_in_transit, 1);
-        vlc_sem_post(&sys->sem);
-    } else if (buffer->cmd == MMAL_EVENT_FORMAT_CHANGED) {
-        fmt = mmal_event_format_changed_get(buffer);
+    filter_t * const p_filter = (filter_t *)port->userdata;
+    filter_sys_t * const sys = p_filter->p_sys;
 
-        format = mmal_format_alloc();
-        mmal_format_full_copy(format, fmt->format);
+#if TRACE_ALL
+    msg_Dbg(p_filter, "<<< %s: cmd=%d, flags=%#x, pic=%p, data=%p, len=%d/%d, pts=%lld/%lld", __func__,
+            buf->cmd, buf->flags, buf->user_data, buf->data, buf->length, buf->alloc_size,
+            (long long)buf->pts, (long long)sys->stash[(unsigned int)(buf->pts & 0xf)].pts);
+#endif
+    if (buf->cmd == 0) {
+        picture_t * const pic = (picture_t *)buf->user_data;
 
-        if (sys->opaque)
-            format->encoding = MMAL_ENCODING_OPAQUE;
+        if (pic == NULL) {
+            msg_Err(p_filter, "%s: Buffer has no attached picture", __func__);
+        }
+        else if (buf->data == NULL || buf->length == 0)
+        {
+#if TRACE_ALL
+            msg_Dbg(p_filter, "%s: Buffer has no data", __func__);
+#endif
+        }
+        else
+        {
+            buf_to_pic_copy_props(pic, buf);
+
+            // Set pic data pointers from buf aux info now it has it
+            if (sys->is_cma) {
+                if (cma_pic_set_data(pic, sys->output->format, buf) != VLC_SUCCESS)
+                    msg_Err(p_filter, "Failed to set data");
+            }
+
+//            draw_corners(pic->p[0].p_pixels, pic->p[0].i_pitch / 4, 0, 0, pic->p[0].i_visible_pitch / 4, pic->p[0].i_visible_lines);
+#if DEBUG_SQUARES
+            draw_square(pic->p[0].p_pixels, pic->p[0].i_pitch / 4,  0, 0, 32, 32, 0xffff0000);
+            draw_square(pic->p[0].p_pixels, pic->p[0].i_pitch / 4, 32, 0, 32, 32, 0xff00ff00);
+            draw_square(pic->p[0].p_pixels, pic->p[0].i_pitch / 4, 64, 0, 32, 32, 0xff0000ff);
+#endif
+
+            buf->user_data = NULL;  // Responsability for this pic no longer with buffer
+            conv_out_q_pic(sys, pic);
+        }
+    }
+
+    mmal_buffer_header_release(buf);
+}
+
+
+static void slice_output_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buf)
+{
+    filter_t * const p_filter = (filter_t *)port->userdata;
+    filter_sys_t * const sys = p_filter->p_sys;
+
+#if TRACE_ALL
+    msg_Dbg(p_filter, "<<< %s: cmd=%d, flags=%#x, pic=%p, data=%p, len=%d/%d, pts=%lld", __func__,
+            buf->cmd, buf->flags, buf->user_data, buf->data, buf->length, buf->alloc_size, (long long)buf->pts);
+#endif
+
+    if (buf->cmd != 0)
+    {
+        mmal_buffer_header_release(buf);
+        return;
+    }
+
+    if (buf->data == NULL || buf->length == 0)
+    {
+#if TRACE_ALL
+        msg_Dbg(p_filter, "%s: Buffer has no data", __func__);
+#endif
+    }
+    else
+    {
+        // Got slice
+        picture_t *pic = sys->slice.pics.head;
+        const unsigned int scale_lines = sys->output->format->es->video.height;  // Expected lines of callback
+
+        if (pic == NULL) {
+            msg_Err(p_filter, "No output picture");
+            goto fail;
+        }
+
+        // Copy lines
+        // * single plane only - fix for I420
+        {
+            const unsigned int scale_n = __MIN(scale_lines - sys->slice.line, MMAL_SLICE_HEIGHT);
+            const unsigned int pic_lines = pic->p[0].i_lines;
+            const unsigned int copy_n = sys->slice.line + scale_n <= pic_lines ? scale_n :
+                sys->slice.line >= pic_lines ? 0 :
+                    pic_lines - sys->slice.line;
+
+            const unsigned int src_stride = buf->type->video.pitch[0];
+            const unsigned int dst_stride = pic->p[0].i_pitch;
+            uint8_t *dst = pic->p[0].p_pixels + sys->slice.line * dst_stride;
+            const uint8_t *src = buf->data + buf->type->video.offset[0];
+
+            if (src_stride == dst_stride) {
+                if (copy_n != 0)
+                    memcpy(dst, src, src_stride * copy_n);
+            }
+            else {
+                unsigned int i;
+                for (i = 0; i != copy_n; ++i) {
+                    memcpy(dst, src, __MIN(dst_stride, src_stride));
+                    dst += dst_stride;
+                    src += src_stride;
+                }
+            }
+            sys->slice.line += scale_n;
+        }
+
+        if ((buf->flags & MMAL_BUFFER_HEADER_FLAG_FRAME_END) != 0 || sys->slice.line >= scale_lines) {
+
+            if ((buf->flags & MMAL_BUFFER_HEADER_FLAG_FRAME_END) == 0 || sys->slice.line != scale_lines) {
+                // Stuff doesn't add up...
+                msg_Err(p_filter, "Line count (%d/%d) & EOF disagree (flags=%#x)", sys->slice.line, scale_lines, buf->flags);
+                goto fail;
+            }
+            else {
+                sys->slice.line = 0;
+
+                vlc_mutex_lock(&sys->lock);
+                pic_fifo_get(&sys->slice.pics);  // Remove head from Q
+                vlc_mutex_unlock(&sys->lock);
+
+                buf_to_pic_copy_props(pic, buf);
+                conv_out_q_pic(sys, pic);
+            }
+        }
+    }
+
+    // Put back
+    buf->user_data = NULL; // Zap here to make sure we can't reuse later
+    mmal_buffer_header_reset(buf);
+
+    if (mmal_port_send_buffer(sys->output, buf) != MMAL_SUCCESS) {
+        mmal_buffer_header_release(buf);
+    }
+    return;
+
+fail:
+    sys->err_stream = MMAL_EIO;
+    vlc_sem_post(&sys->sem);  // If we were waiting then break us out - the flush should fix sem values
+}
+
+
+static void conv_flush(filter_t * p_filter)
+{
+    filter_sys_t * const sys = p_filter->p_sys;
+    unsigned int i;
+
+#if TRACE_ALL
+    msg_Dbg(p_filter, "<<< %s", __func__);
+#endif
+
+    if (sys->resizer_type == FILTER_RESIZER_HVS)
+    {
+        for (i = 0; i != SUBS_MAX; ++i) {
+            hw_mmal_subpic_flush(VLC_OBJECT(p_filter), sys->subs + i);
+        }
+    }
+
+    if (sys->input != NULL && sys->input->is_enabled)
+        mmal_port_disable(sys->input);
+
+    if (sys->output != NULL && sys->output->is_enabled)
+        mmal_port_disable(sys->output);
+
+//    cma_buf_pool_deletez(&sys->cma_out_pool);
+
+    // Free up anything we may have already lying around
+    // Don't need lock as the above disables should have prevented anything
+    // happening in the background
+
+    for (i = 0; i != 16; ++i) {
+        conv_frame_stash_t *const stash = sys->stash + i;
+        unsigned int sub_no;
+
+        stash->pts = MMAL_TIME_UNKNOWN;
+        for (sub_no = 0; sub_no != SUBS_MAX; ++sub_no) {
+            if (stash->sub_bufs[sub_no] != NULL) {
+                mmal_buffer_header_release(stash->sub_bufs[sub_no]);
+                stash->sub_bufs[sub_no] = NULL;
+            }
+        }
+    }
+
+    pic_fifo_release_all(&sys->slice.pics);
+    pic_fifo_release_all(&sys->ret_pics);
+
+    // Reset sem values - easiest & most reliable way is to just kill & re-init
+    vlc_sem_destroy(&sys->sem);
+    vlc_sem_init(&sys->sem, 0);
+    sys->pic_n = 0;
+
+    // Reset error status
+    sys->err_stream = MMAL_SUCCESS;
+
+#if TRACE_ALL
+    msg_Dbg(p_filter, ">>> %s", __func__);
+#endif
+}
+
+static void conv_stash_fixup(filter_t * const p_filter, filter_sys_t * const sys, picture_t * const p_pic)
+{
+    conv_frame_stash_t * const stash = sys->stash + (p_pic->date & 0xf);
+    unsigned int sub_no;
+    VLC_UNUSED(p_filter);
+
+    p_pic->date = stash->pts;
+    for (sub_no = 0; sub_no != SUBS_MAX; ++sub_no) {
+        if (stash->sub_bufs[sub_no] != NULL) {
+            // **** Do stashed blend
+            // **** Aaargh, bother... need to rescale subs too
+
+            mmal_buffer_header_release(stash->sub_bufs[sub_no]);
+            stash->sub_bufs[sub_no] = NULL;
+        }
+    }
+}
+
+// Output buffers may contain a pic ref on error or flush
+// Free it
+static MMAL_BOOL_T out_buffer_pre_release_cb(MMAL_BUFFER_HEADER_T *header, void *userdata)
+{
+    VLC_UNUSED(userdata);
+
+    picture_t * const pic = header->user_data;
+    header->user_data = NULL;
+
+    if (pic != NULL)
+        picture_Release(pic);
+
+    return MMAL_FALSE;
+}
+
+static MMAL_STATUS_T conv_set_output(filter_t * const p_filter, filter_sys_t * const sys, picture_t * const pic)
+{
+    MMAL_STATUS_T status;
+
+    sys->output->userdata = (struct MMAL_PORT_USERDATA_T *)p_filter;
+    sys->output->format->type = MMAL_ES_TYPE_VIDEO;
+    sys->output->format->encoding = vlc_to_mmal_video_fourcc(&p_filter->fmt_out.video);
+    sys->output->format->encoding_variant = 0;
+    hw_mmal_vlc_fmt_to_mmal_fmt(sys->output->format, &p_filter->fmt_out.video);
+
+    if (pic != NULL)
+    {
+        // Override default format width/height if we have a pic we need to match
+        if ((status = pic_to_format(sys->output->format, pic)) != MMAL_SUCCESS)
+        {
+            char cbuf[5];
+            msg_Err(p_filter, "Bad format desc: %s, pic=%p, bits=%d", str_fourcc(cbuf, pic->format.i_chroma), pic, pic->format.i_bits_per_pixel);
+            return status;
+        }
+
+        MMAL_VIDEO_FORMAT_T *fmt = &sys->output->format->es->video;
+        msg_Dbg(p_filter, "%s: %dx%d [(0,0) %dx%d]", __func__, fmt->width, fmt->height, fmt->crop.width, fmt->crop.height);
+    }
+
+    if (sys->is_sliced) {
+        // Override height for slice
+        sys->output->format->es->video.height = MMAL_SLICE_HEIGHT;
+    }
+
+    mmal_log_dump_format(sys->output->format);
+
+    status = mmal_port_format_commit(sys->output);
+    if (status != MMAL_SUCCESS) {
+        msg_Err(p_filter, "Failed to commit format for output port %s (status=%"PRIx32" %s)",
+                sys->output->name, status, mmal_status_to_string(status));
+        return status;
+    }
+
+    sys->output->buffer_num = __MAX(sys->is_sliced ? 16 : 2, sys->output->buffer_num_recommended);
+    sys->output->buffer_size = sys->output->buffer_size_recommended;
+
+    if ((status = conv_enable_out(p_filter, sys)) != MMAL_SUCCESS)
+        return status;
+
+    return MMAL_SUCCESS;
+}
+
+static picture_t *conv_filter(filter_t *p_filter, picture_t *p_pic)
+{
+    filter_sys_t * const sys = p_filter->p_sys;
+    picture_t * ret_pics;
+    MMAL_STATUS_T err;
+    const uint64_t frame_seq = ++sys->frame_seq;
+    conv_frame_stash_t * const stash = sys->stash + (frame_seq & 0xf);
+    MMAL_BUFFER_HEADER_T * out_buf = NULL;
+
+#if TRACE_ALL
+    {
+        char dbuf0[5], dbuf1[5];
+        msg_Dbg(p_filter, "<<< %s: %s,%dx%d [(%d,%d) %d/%d] sar:%d/%d->%s,%dx%d [(%d,%d) %dx%d] sar:%d/%d", __func__,
+                str_fourcc(dbuf0, p_filter->fmt_in.video.i_chroma), p_filter->fmt_in.video.i_width, p_filter->fmt_in.video.i_height,
+                p_filter->fmt_in.video.i_x_offset, p_filter->fmt_in.video.i_y_offset,
+                p_filter->fmt_in.video.i_visible_width, p_filter->fmt_in.video.i_visible_height,
+                p_filter->fmt_in.video.i_sar_num, p_filter->fmt_in.video.i_sar_den,
+                str_fourcc(dbuf1, p_filter->fmt_out.video.i_chroma), p_filter->fmt_out.video.i_width, p_filter->fmt_out.video.i_height,
+                p_filter->fmt_out.video.i_x_offset, p_filter->fmt_out.video.i_y_offset,
+                p_filter->fmt_out.video.i_visible_width, p_filter->fmt_out.video.i_visible_height,
+                p_filter->fmt_out.video.i_sar_num, p_filter->fmt_out.video.i_sar_den);
+    }
+#endif
+
+    if (sys->err_stream != MMAL_SUCCESS) {
+        goto stream_fail;
+    }
+
+    // Check pic fmt corresponds to what we have set up
+    // ??? ISP may require flush (disable) but actually seems quite happy
+    //     without
+    if (hw_mmal_vlc_pic_to_mmal_fmt_update(sys->input->format, p_pic))
+    {
+        msg_Dbg(p_filter, "Reset input port format");
+        mmal_port_format_commit(sys->input);
+    }
+
+    if (p_pic->context == NULL) {
+        // Can't have stashed subpics if not one of our pics
+        if (!sys->needs_copy_in)
+            msg_Dbg(p_filter, "%s: No context", __func__);
+    }
+    else if (sys->resizer_type == FILTER_RESIZER_HVS)
+    {
+        unsigned int sub_no = 0;
+
+        for (sub_no = 0; sub_no != SUBS_MAX; ++sub_no) {
+            int rv;
+            if ((rv = hw_mmal_subpic_update(VLC_OBJECT(p_filter),
+                                            hw_mmal_pic_sub_buf_get(p_pic, sub_no),
+                                            sys->subs + sub_no,
+                                            &p_pic->format,
+                                            &sys->output->format->es->video.crop,
+                                            frame_seq)) == 0)
+                break;
+            else if (rv < 0)
+                goto fail;
+        }
+    }
+    else
+    {
+        unsigned int sub_no = 0;
+        for (sub_no = 0; sub_no != SUBS_MAX; ++sub_no) {
+            if ((stash->sub_bufs[sub_no] = hw_mmal_pic_sub_buf_get(p_pic, sub_no)) != NULL) {
+                mmal_buffer_header_acquire(stash->sub_bufs[sub_no]);
+            }
+        }
+    }
+
+    if (!sys->out_fmt_set) {
+        sys->out_fmt_set = true;
+
+        if (sys->is_sliced) {
+            // If zc then we will do stride conversion when we copy to arm side
+            // so no need to worry about actual pic dimensions here
+            if ((err = conv_set_output(p_filter, sys, NULL)) != MMAL_SUCCESS)
+                goto fail;
+
+            sys->out_pool = mmal_port_pool_create(sys->output, sys->output->buffer_num, sys->output->buffer_size);
+        }
+        else {
+            picture_t *pic = filter_NewPicture(p_filter);
+            err = conv_set_output(p_filter, sys, pic);
+            picture_Release(pic);
+            if (err != MMAL_SUCCESS)
+                goto fail;
+
+            sys->out_pool = mmal_pool_create(sys->output->buffer_num, 0);
+        }
+
+        if (sys->out_pool == NULL) {
+            msg_Err(p_filter, "Failed to create output pool");
+            goto fail;
+        }
+    }
+
+    // Reenable stuff if the last thing we did was flush
+    if ((err = conv_enable_out(p_filter, sys)) != MMAL_SUCCESS ||
+        (err = conv_enable_in(p_filter, sys)) != MMAL_SUCCESS)
+        goto fail;
+
+    // We attach pic to buf before stuffing the output port
+    // We could attach the pic on output for cma, but it is a lot easier to keep
+    // the code common.
+    {
+        picture_t * const out_pic = filter_NewPicture(p_filter);
+
+        if (out_pic == NULL)
+        {
+            msg_Err(p_filter, "Failed to alloc required filter output pic");
+            goto fail;
+        }
+
+        out_pic->format.i_sar_den = p_filter->fmt_out.video.i_sar_den;
+        out_pic->format.i_sar_num = p_filter->fmt_out.video.i_sar_num;
+
+        if (sys->is_sliced) {
+            vlc_mutex_lock(&sys->lock);
+            pic_fifo_put(&sys->slice.pics, out_pic);
+            vlc_mutex_unlock(&sys->lock);
+
+            // Poke any returned pic buffers into output
+            // In general this should only happen immediately after enable
+            while ((out_buf = mmal_queue_get(sys->out_pool->queue)) != NULL)
+                mmal_port_send_buffer(sys->output, out_buf);
+        }
+        else
+        {
+            // 1 in - 1 out
+            if ((out_buf = mmal_queue_wait(sys->out_pool->queue)) == NULL)
+            {
+                msg_Err(p_filter, "Failed to get output buffer");
+                picture_Release(out_pic);
+                goto fail;
+            }
+            mmal_buffer_header_reset(out_buf);
+
+            // Attach out_pic to the buffer & ensure it is freed when the buffer is released
+            // On a good send callback the pic will be extracted to avoid this
+            out_buf->user_data = out_pic;
+            mmal_buffer_header_pre_release_cb_set(out_buf, out_buffer_pre_release_cb, NULL);
+
+#if 0
+            {
+                char dbuf0[5];
+                msg_Dbg(p_filter, "out_pic %s,%dx%d [(%d,%d) %d/%d] sar:%d/%d",
+                        str_fourcc(dbuf0, out_pic->format.i_chroma),
+                        out_pic->format.i_width, out_pic->format.i_height,
+                        out_pic->format.i_x_offset, out_pic->format.i_y_offset,
+                        out_pic->format.i_visible_width, out_pic->format.i_visible_height,
+                        out_pic->format.i_sar_num, out_pic->format.i_sar_den);
+            }
+#endif
+
+            if (sys->is_cma) {
+                int rv;
+
+                cma_buf_t * const cb = cma_buf_pool_alloc_buf(sys->cma_out_pool, sys->output->buffer_size);
+                if (cb == NULL) {
+                    char dbuf0[5];
+                    msg_Err(p_filter, "Failed to alloc CMA buf: fmt=%s, size=%d",
+                            str_fourcc(dbuf0, out_pic->format.i_chroma),
+                            sys->output->buffer_size);
+                    goto fail;
+                }
+                const unsigned int vc_h = cma_buf_vc_handle(cb);  // Cannot coerce without going via variable
+                out_buf->data = (uint8_t *)vc_h;
+                out_buf->alloc_size = sys->output->buffer_size;
+
+                if ((rv = cma_buf_pic_attach(cb, out_pic)) != VLC_SUCCESS)
+                {
+                    char dbuf0[5];
+                    msg_Err(p_filter, "Failed to attach CMA to pic: fmt=%s err=%d",
+                            str_fourcc(dbuf0, out_pic->format.i_chroma),
+                            rv);
+                    cma_buf_unref(cb);
+                    goto fail;
+                }
+            }
+            else {
+                out_buf->data = out_pic->p[0].p_pixels;
+                out_buf->alloc_size = out_pic->p[0].i_pitch * out_pic->p[0].i_lines;
+                //**** stride ????
+            }
+
+#if TRACE_ALL
+            msg_Dbg(p_filter, "Out buf send: pic=%p, data=%p, user=%p, flags=%#x, len=%d/%d, pts=%lld",
+                    p_pic, out_buf->data, out_buf->user_data, out_buf->flags,
+                    out_buf->length, out_buf->alloc_size, (long long)out_buf->pts);
+#endif
+
+            if ((err = mmal_port_send_buffer(sys->output, out_buf)) != MMAL_SUCCESS)
+            {
+                msg_Err(p_filter, "Send buffer to output failed");
+                goto fail;
+            }
+            out_buf = NULL;
+        }
+    }
+
+
+    // Stuff into input
+    // We assume the BH is already set up with values reflecting pic date etc.
+    stash->pts = p_pic->date;
+    {
+        MMAL_BUFFER_HEADER_T *const pic_buf = sys->needs_copy_in ?
+            hw_mmal_pic_buf_copied(p_pic, sys->in_pool, sys->input, sys->cma_in_pool) :
+            hw_mmal_pic_buf_replicated(p_pic, sys->in_pool);
+
+        // Whether or not we extracted the pic_buf we are done with the picture
+        picture_Release(p_pic);
+        p_pic = NULL;
+
+        if (pic_buf == NULL) {
+            msg_Err(p_filter, "Pic has no attached buffer");
+            goto fail;
+        }
+
+        pic_buf->pts = frame_seq;
+
+#if TRACE_ALL
+            msg_Dbg(p_filter, "In buf send: pic=%p, data=%p, user=%p, flags=%#x, len=%d/%d/%d, pts=%lld",
+                    p_pic, pic_buf->data, pic_buf->user_data, pic_buf->flags,
+                    pic_buf->length, pic_buf->alloc_size, sys->input->buffer_size, (long long)pic_buf->pts);
+#endif
+
+        if ((err = mmal_port_send_buffer(sys->input, pic_buf)) != MMAL_SUCCESS)
+        {
+            msg_Err(p_filter, "Send buffer to input failed");
+            mmal_buffer_header_release(pic_buf);
+            goto fail;
+        }
+    }
+
+    // We have a 1 pic latency for everything except the 1st pic which we
+    // wait for.
+    // This means we get a single static pic out
+    if (sys->pic_n++ == 1) {
+#if TRACE_ALL
+        msg_Dbg(p_filter, ">>> %s: Pic1=NULL", __func__);
+#endif
+        return NULL;
+    }
+    vlc_sem_wait(&sys->sem);
+
+    // Return a single pending buffer
+    vlc_mutex_lock(&sys->lock);
+    ret_pics = pic_fifo_get(&sys->ret_pics);
+    vlc_mutex_unlock(&sys->lock);
+
+    if (sys->err_stream != MMAL_SUCCESS)
+        goto stream_fail;
+
+    conv_stash_fixup(p_filter, sys, ret_pics);
+
+#if TRACE_ALL
+    msg_Dbg(p_filter, ">>> %s: pic=%p", __func__, ret_pics);
+#endif
+
+    return ret_pics;
+
+stream_fail:
+    msg_Err(p_filter, "MMAL error reported by callback");
+fail:
+#if TRACE_ALL
+    msg_Err(p_filter, ">>> %s: FAIL", __func__);
+    picture_Release(ret_pics);
+#endif
+    if (out_buf != NULL)
+        mmal_buffer_header_release(out_buf);
+    if (p_pic != NULL)
+        picture_Release(p_pic);
+    conv_flush(p_filter);
+    return NULL;
+}
+
+static void CloseConverter(vlc_object_t * obj)
+{
+    filter_t * const p_filter = (filter_t *)obj;
+    filter_sys_t * const sys = p_filter->p_sys;
+    unsigned int i;
+
+#if TRACE_ALL
+    msg_Dbg(obj, "<<< %s", __func__);
+#endif
+
+    if (sys == NULL)
+        return;
+
+    // Disables input & output ports
+    conv_flush(p_filter);
+
+    cma_buf_pool_deletez(&sys->cma_in_pool);
+    cma_buf_pool_deletez(&sys->cma_out_pool);
+
+    if (sys->component && sys->component->control->is_enabled)
+        mmal_port_disable(sys->component->control);
+
+    if (sys->component && sys->component->is_enabled)
+        mmal_component_disable(sys->component);
+
+    if (sys->resizer_type == FILTER_RESIZER_HVS)
+    {
+        for (i = 0; i != SUBS_MAX; ++i) {
+            hw_mmal_subpic_close(VLC_OBJECT(p_filter), sys->subs + i);
+        }
+    }
+
+    if (sys->out_pool)
+    {
+        if (sys->is_sliced)
+            mmal_port_pool_destroy(sys->output, sys->out_pool);
+        else
+            mmal_pool_destroy(sys->out_pool);
+    }
+
+    if (sys->in_pool != NULL)
+        mmal_pool_destroy(sys->in_pool);
+
+    if (sys->component)
+        mmal_component_release(sys->component);
+
+    cma_vcsm_exit(sys->vcsm_init_type);
+
+    vlc_sem_destroy(&sys->sem);
+    vlc_mutex_destroy(&sys->lock);
+
+    p_filter->p_sys = NULL;
+    free(sys);
+}
+
+
+static inline MMAL_FOURCC_T filter_enc_in(const video_format_t * const fmt)
+{
+    if (hw_mmal_chroma_is_mmal(fmt->i_chroma))
+        return vlc_to_mmal_video_fourcc(fmt);
+
+    if (fmt->i_chroma == VLC_CODEC_I420 ||
+        fmt->i_chroma == VLC_CODEC_I420_10L)
+        return MMAL_ENCODING_I420;
+
+    return 0;
+}
 
-        sys->output_format = format;
+static inline MMAL_FOURCC_T filter_enc_out(const video_format_t * const fmt)
+{
+    const MMAL_FOURCC_T mmes = vlc_to_mmal_video_fourcc(fmt);
+    // Can only copy out single plane stuff currently - this could be fixed!
+    return hw_mmal_chroma_is_mmal(fmt->i_chroma) || mmes != MMAL_ENCODING_I420 ? mmes : 0;
+}
+
+
+static int OpenConverter(vlc_object_t * obj)
+{
+    filter_t * const p_filter = (filter_t *)obj;
+    int ret = VLC_EGENERIC;
+    filter_sys_t *sys;
+    MMAL_STATUS_T status;
+    MMAL_FOURCC_T enc_out = filter_enc_out(&p_filter->fmt_out.video);
+    const MMAL_FOURCC_T enc_in = filter_enc_in(&p_filter->fmt_in.video);
+    bool use_resizer;
+    bool use_isp;
+    int gpu_mem;
+
+    // At least in principle we should deal with any mmal format as input
+    if (enc_in == 0 || enc_out == 0)
+        return VLC_EGENERIC;
+
+    use_resizer = var_InheritBool(p_filter, MMAL_RESIZE_NAME);
+    use_isp = var_InheritBool(p_filter, MMAL_ISP_NAME);
+
+retry:
+    // ** Make more generic by checking supported encs
+    //
+    // Must use ISP - HVS can't do this, nor can resizer
+    if (enc_in == MMAL_ENCODING_YUVUV64_10) {
+        // If resizer selected then just give up
+        if (use_resizer)
+            return VLC_EGENERIC;
+        // otherwise downgrade HVS to ISP
+        use_isp = true;
+    }
+    // HVS can't do I420
+    if (enc_out == MMAL_ENCODING_I420) {
+        use_isp = true;
+    }
+    // Only HVS can deal with SAND30
+    if (enc_in == MMAL_ENCODING_YUV10_COL) {
+        if (use_isp || use_resizer)
+            return VLC_EGENERIC;
+    }
+
+
+    if (use_resizer) {
+        // use resizer overrides use_isp
+        use_isp = false;
+    }
+
+    // Check we have a sliced version of the fourcc if we want the resizer
+    if (use_resizer &&
+        (enc_out = pic_to_slice_mmal_fourcc(enc_out)) == 0) {
+        return VLC_EGENERIC;
+    }
+
+    gpu_mem = hw_mmal_get_gpu_mem();
+
+    {
+        char dbuf0[5], dbuf1[5], dbuf2[5], dbuf3[5];
+        msg_Dbg(p_filter, "%s: (%s) %s/%s,%dx%d [(%d,%d) %d/%d] sar:%d/%d->%s/%s,%dx%d [(%d,%d) %dx%d] rgb:%#x:%#x:%#x sar:%d/%d (gpu=%d)", __func__,
+                use_resizer ? "resize" : use_isp ? "isp" : "hvs",
+                str_fourcc(dbuf0, p_filter->fmt_in.video.i_chroma), str_fourcc(dbuf2, enc_in),
+                p_filter->fmt_in.video.i_width, p_filter->fmt_in.video.i_height,
+                p_filter->fmt_in.video.i_x_offset, p_filter->fmt_in.video.i_y_offset,
+                p_filter->fmt_in.video.i_visible_width, p_filter->fmt_in.video.i_visible_height,
+                p_filter->fmt_in.video.i_sar_num, p_filter->fmt_in.video.i_sar_den,
+                str_fourcc(dbuf1, p_filter->fmt_out.video.i_chroma), str_fourcc(dbuf3, enc_out),
+                p_filter->fmt_out.video.i_width, p_filter->fmt_out.video.i_height,
+                p_filter->fmt_out.video.i_x_offset, p_filter->fmt_out.video.i_y_offset,
+                p_filter->fmt_out.video.i_visible_width, p_filter->fmt_out.video.i_visible_height,
+                p_filter->fmt_out.video.i_rmask, p_filter->fmt_out.video.i_gmask, p_filter->fmt_out.video.i_bmask,
+                p_filter->fmt_out.video.i_sar_num, p_filter->fmt_out.video.i_sar_den,
+                gpu_mem);
+    }
+
+    sys = calloc(1, sizeof(filter_sys_t));
+    if (!sys) {
+        ret = VLC_ENOMEM;
+        goto fail;
+    }
+    p_filter->p_sys = sys;
 
-        mmal_buffer_header_release(buffer);
+    // Init stuff the we destroy unconditionaly in Close first
+    vlc_mutex_init(&sys->lock);
+    vlc_sem_init(&sys->sem, 0);
+    sys->err_stream = MMAL_SUCCESS;
+    pic_fifo_init(&sys->ret_pics);
+    pic_fifo_init(&sys->slice.pics);
+
+    sys->needs_copy_in = !hw_mmal_chroma_is_mmal(p_filter->fmt_in.video.i_chroma);
+    sys->in_port_cb_fn = conv_input_port_cb;
+
+    if ((sys->vcsm_init_type = cma_vcsm_init()) == VCSM_INIT_NONE) {
+        msg_Err(p_filter, "VCSM init failed");
+        goto fail;
+    }
+
+    if (use_resizer) {
+        sys->resizer_type = FILTER_RESIZER_RESIZER;
+        sys->is_sliced = true;
+        sys->component_name = MMAL_COMPONENT_DEFAULT_RESIZER;
+        sys->out_port_cb_fn = slice_output_port_cb;
+    }
+    else if (use_isp) {
+        sys->resizer_type = FILTER_RESIZER_ISP;
+        sys->is_sliced = false;  // Copy directly into filter picture
+        sys->component_name = MMAL_COMPONENT_ISP_RESIZER;
+        sys->out_port_cb_fn = conv_output_port_cb;
     } else {
-        mmal_buffer_header_release(buffer);
+        sys->resizer_type = FILTER_RESIZER_HVS;
+        sys->is_sliced = false;  // Copy directly into filter picture
+        sys->component_name = MMAL_COMPONENT_HVS;
+        sys->out_port_cb_fn = conv_output_port_cb;
+    }
+    sys->is_cma = is_cma_buf_pic_chroma(p_filter->fmt_out.video.i_chroma);
+
+    status = mmal_component_create(sys->component_name, &sys->component);
+    if (status != MMAL_SUCCESS) {
+        if (!use_isp && !use_resizer) {
+            msg_Warn(p_filter, "Failed to rcreate HVS resizer - retrying with ISP");
+            CloseConverter(obj);
+            use_isp = true;
+            goto retry;
+        }
+        msg_Err(p_filter, "Failed to create MMAL component %s (status=%"PRIx32" %s)",
+                MMAL_COMPONENT_DEFAULT_VIDEO_DECODER, status, mmal_status_to_string(status));
+        goto fail;
+    }
+    sys->output = sys->component->output[0];
+    sys->input  = sys->component->input[0];
+
+    sys->component->control->userdata = (struct MMAL_PORT_USERDATA_T *)p_filter;
+    status = mmal_port_enable(sys->component->control, conv_control_port_cb);
+    if (status != MMAL_SUCCESS) {
+        msg_Err(p_filter, "Failed to enable control port %s (status=%"PRIx32" %s)",
+                sys->component->control->name, status, mmal_status_to_string(status));
+        goto fail;
+    }
+
+    if (sys->needs_copy_in &&
+        (sys->cma_in_pool = cma_buf_pool_new(2, 2, true, "conv-copy-in")) == NULL)
+    {
+        msg_Err(p_filter, "Failed to allocate input CMA pool");
+        goto fail;
+    }
+
+    sys->input->userdata = (struct MMAL_PORT_USERDATA_T *)p_filter;
+    sys->input->format->type = MMAL_ES_TYPE_VIDEO;
+    sys->input->format->encoding = enc_in;
+    sys->input->format->encoding_variant = MMAL_ENCODING_I420;
+    hw_mmal_vlc_fmt_to_mmal_fmt(sys->input->format, &p_filter->fmt_in.video);
+    port_parameter_set_bool(sys->input, MMAL_PARAMETER_ZERO_COPY, 1);
+
+    mmal_log_dump_format(sys->input->format);
+
+    status = mmal_port_format_commit(sys->input);
+    if (status != MMAL_SUCCESS) {
+        msg_Err(p_filter, "Failed to commit format for input port %s (status=%"PRIx32" %s)",
+                sys->input->name, status, mmal_status_to_string(status));
+        goto fail;
+    }
+    sys->input->buffer_size = sys->input->buffer_size_recommended;
+    sys->input->buffer_num = NUM_DECODER_BUFFER_HEADERS;
+
+    if ((status = conv_enable_in(p_filter, sys)) != MMAL_SUCCESS)
+        goto fail;
+
+    port_parameter_set_bool(sys->output, MMAL_PARAMETER_ZERO_COPY, sys->is_sliced || sys->is_cma);
+
+    status = mmal_component_enable(sys->component);
+    if (status != MMAL_SUCCESS) {
+        msg_Err(p_filter, "Failed to enable component %s (status=%"PRIx32" %s)",
+                sys->component->name, status, mmal_status_to_string(status));
+        goto fail;
+    }
+
+    if ((sys->in_pool = mmal_pool_create(sys->input->buffer_num, 0)) == NULL)
+    {
+        msg_Err(p_filter, "Failed to create input pool");
+        goto fail;
+    }
+
+    if (sys->resizer_type == FILTER_RESIZER_HVS)
+    {
+        unsigned int i;
+        for (i = 0; i != SUBS_MAX; ++i) {
+            if (hw_mmal_subpic_open(VLC_OBJECT(p_filter), sys->subs + i, sys->component->input[i + 1], -1, i + 1) != MMAL_SUCCESS)
+            {
+                msg_Err(p_filter, "Failed to open subpic %d", i);
+                goto fail;
+            }
+        }
+    }
+
+    p_filter->pf_video_filter = conv_filter;
+    p_filter->pf_flush = conv_flush;
+    // video_drain NIF in filter structure
+
+#if TRACE_ALL
+    msg_Dbg(p_filter, ">>> %s: ok", __func__);
+#endif
+
+    return VLC_SUCCESS;
+
+fail:
+    CloseConverter(obj);
+
+    if (!use_resizer && status == MMAL_ENOMEM) {
+        use_resizer = true;
+        msg_Warn(p_filter, "Lack of memory to use HVS/ISP: trying resizer");
+        goto retry;
     }
+
+#if TRACE_ALL
+    msg_Dbg(p_filter, ">>> %s: FAIL: %d", __func__, ret);
+#endif
+    return ret;
 }
+
+#if OPT_TO_FROM_ZC
+//----------------------------------------------------------------------------
+//
+// Simple copy in to ZC
+
+typedef struct to_zc_sys_s {
+    vcsm_init_type_t vcsm_init_type;
+    cma_buf_pool_t * cma_out_pool;
+} to_zc_sys_t;
+
+
+static size_t buf_alloc_size(const vlc_fourcc_t i_chroma, const unsigned int width, const unsigned int height)
+{
+    const unsigned int pels = width * height;
+
+    switch (i_chroma)
+    {
+        case VLC_CODEC_MMAL_ZC_RGB32:
+            return pels * 4;
+        case VLC_CODEC_MMAL_ZC_I420:
+            return pels * 3 / 2;
+        default:
+            break;
+    }
+    return 0;
+}
+
+
+static picture_t *
+to_zc_filter(filter_t *p_filter, picture_t *in_pic)
+{
+    to_zc_sys_t * const sys = (to_zc_sys_t *)p_filter->p_sys;
+#if TRACE_ALL
+    msg_Dbg(p_filter, "<<< %s", __func__);
+#endif
+
+    assert(p_filter->fmt_out.video.i_chroma == VLC_CODEC_MMAL_ZC_I420);
+
+    picture_t * const out_pic = filter_NewPicture(p_filter);
+    if (out_pic == NULL)
+        goto fail0;
+
+    MMAL_ES_SPECIFIC_FORMAT_T mm_vfmt = {.video={0}};
+    MMAL_ES_FORMAT_T mm_esfmt = {
+        .encoding = vlc_to_mmal_video_fourcc(&p_filter->fmt_out.video),
+        .es = &mm_vfmt};
+
+    hw_mmal_vlc_fmt_to_mmal_fmt(&mm_esfmt, &p_filter->fmt_out.video);
+
+    const size_t buf_alloc = buf_alloc_size(p_filter->fmt_out.video.i_chroma,
+                                            mm_vfmt.video.width, mm_vfmt.video.height);
+    if (buf_alloc == 0)
+        goto fail1;
+    cma_buf_t *const cb = cma_buf_pool_alloc_buf(sys->cma_out_pool, buf_alloc);
+    if (cb == NULL)
+        goto fail1;
+
+    if (cma_buf_pic_attach(cb, out_pic) != VLC_SUCCESS)
+        goto fail2;
+    cma_pic_set_data(out_pic, &mm_esfmt, NULL);
+
+    hw_mmal_copy_pic_to_buf(cma_buf_addr(cb), NULL, &mm_esfmt, in_pic);
+
+    // Copy pic properties
+    out_pic->date              = in_pic->date;
+    out_pic->b_force           = in_pic->b_force;
+    out_pic->b_progressive     = in_pic->b_progressive;
+    out_pic->b_top_field_first = in_pic->b_top_field_first;
+    out_pic->i_nb_fields       = in_pic->i_nb_fields;
+
+    picture_Release(in_pic);
+
+    return out_pic;
+
+fail2:
+    cma_buf_unref(cb);
+fail1:
+    picture_Release(out_pic);
+fail0:
+    picture_Release(in_pic);
+    return NULL;
+}
+
+static void to_zc_flush(filter_t * p_filter)
+{
+    VLC_UNUSED(p_filter);
+}
+
+static void CloseConverterToZc(vlc_object_t * obj)
+{
+    filter_t * const p_filter = (filter_t *)obj;
+    to_zc_sys_t * const sys = (to_zc_sys_t *)p_filter->p_sys;
+
+    if (sys == NULL)
+        return;
+
+    p_filter->p_sys = NULL;
+
+    cma_buf_pool_deletez(&sys->cma_out_pool);
+    cma_vcsm_exit(sys->vcsm_init_type);
+
+    free(sys);
+}
+
+static bool to_zc_validate_fmt(const video_format_t * const f_in, const video_format_t * const f_out)
+{
+    if (!((f_in->i_chroma == VLC_CODEC_I420 || f_in->i_chroma == VLC_CODEC_I420_10L) &&
+          f_out->i_chroma == VLC_CODEC_MMAL_ZC_I420))
+    {
+        return false;
+    }
+    if (f_in->i_height != f_out->i_height ||
+        f_in->i_width  != f_out->i_width)
+    {
+        return false;
+    }
+
+    return true;
+}
+
+static int OpenConverterToZc(vlc_object_t * obj)
+{
+    int ret = VLC_EGENERIC;
+    filter_t * const p_filter = (filter_t *)obj;
+
+    if (!to_zc_validate_fmt(&p_filter->fmt_in.video, &p_filter->fmt_out.video))
+        goto fail;
+
+    {
+        char dbuf0[5], dbuf1[5];
+        msg_Dbg(p_filter, "%s: %s,%dx%d [(%d,%d) %d/%d] sar:%d/%d->%s,%dx%d [(%d,%d) %dx%d] rgb:%#x:%#x:%#x sar:%d/%d", __func__,
+                str_fourcc(dbuf0, p_filter->fmt_in.video.i_chroma),
+                p_filter->fmt_in.video.i_width, p_filter->fmt_in.video.i_height,
+                p_filter->fmt_in.video.i_x_offset, p_filter->fmt_in.video.i_y_offset,
+                p_filter->fmt_in.video.i_visible_width, p_filter->fmt_in.video.i_visible_height,
+                p_filter->fmt_in.video.i_sar_num, p_filter->fmt_in.video.i_sar_den,
+                str_fourcc(dbuf1, p_filter->fmt_out.video.i_chroma),
+                p_filter->fmt_out.video.i_width, p_filter->fmt_out.video.i_height,
+                p_filter->fmt_out.video.i_x_offset, p_filter->fmt_out.video.i_y_offset,
+                p_filter->fmt_out.video.i_visible_width, p_filter->fmt_out.video.i_visible_height,
+                p_filter->fmt_out.video.i_rmask, p_filter->fmt_out.video.i_gmask, p_filter->fmt_out.video.i_bmask,
+                p_filter->fmt_out.video.i_sar_num, p_filter->fmt_out.video.i_sar_den);
+    }
+
+    to_zc_sys_t * const sys = calloc(1, sizeof(*sys));
+    if (!sys) {
+        ret = VLC_ENOMEM;
+        goto fail;
+    }
+    p_filter->p_sys = (filter_sys_t *)sys;
+
+    if ((sys->vcsm_init_type = cma_vcsm_init()) == VCSM_INIT_NONE) {
+        msg_Err(p_filter, "VCSM init failed");
+        goto fail;
+    }
+
+    if ((sys->cma_out_pool = cma_buf_pool_new(5, 5, true, "conv-to-zc")) == NULL)
+    {
+        msg_Err(p_filter, "Failed to allocate input CMA pool");
+        goto fail;
+    }
+
+    p_filter->pf_video_filter = to_zc_filter;
+    p_filter->pf_flush = to_zc_flush;
+    return VLC_SUCCESS;
+
+fail:
+    CloseConverterToZc(obj);
+    return ret;
+}
+
+//----------------------------------------------------------------------------
+//
+// Simple "copy" from ZC
+
+static void CloseConverterFromZc(vlc_object_t * obj)
+{
+    VLC_UNUSED(obj);
+}
+
+static int OpenConverterFromZc(vlc_object_t * obj)
+{
+    return VLC_EGENERIC;
+}
+#endif
+//----------------------------------------------------------------------------
+
+typedef struct blend_sys_s {
+    vzc_pool_ctl_t * vzc;
+    const picture_t * last_dst;  // Not a ref, just a hint that we have a new pic
+    vcsm_init_type_t vcsm_init_type;
+} blend_sys_t;
+
+static void FilterBlendMmal(filter_t *p_filter,
+                  picture_t *dst, const picture_t * src,
+                  int x_offset, int y_offset, int alpha)
+{
+    blend_sys_t * const sys = (blend_sys_t *)p_filter->p_sys;
+#if TRACE_ALL
+    msg_Dbg(p_filter, "%s (%d,%d:%d) pic=%p, pts=%lld, force=%d", __func__, x_offset, y_offset, alpha, src, src->date, src->b_force);
+#endif
+    // If nothing to do then do nothing
+    if (alpha == 0 ||
+        src->format.i_visible_height == 0 ||
+        src->format.i_visible_width == 0)
+    {
+        return;
+    }
+
+    if (dst->context == NULL)
+        msg_Err(p_filter, "MMAL pic missing context");
+    else
+    {
+        // cast away src const so we can ref it
+        MMAL_BUFFER_HEADER_T *buf = hw_mmal_vzc_buf_from_pic(sys->vzc, (picture_t *)src,
+                                                             vis_mmal_rect(&dst->format),
+                                                             x_offset, y_offset,
+                                                             alpha,
+                                                             dst != sys->last_dst || !hw_mmal_pic_has_sub_bufs(dst));
+        if (buf == NULL) {
+            msg_Err(p_filter, "Failed to allocate vzc buffer for subpic");
+            return;
+        }
+
+        hw_mmal_pic_sub_buf_add(dst, buf);
+
+        sys->last_dst = dst;
+    }
+}
+
+static void FlushBlendMmal(filter_t * p_filter)
+{
+    blend_sys_t * const sys = (blend_sys_t *)p_filter->p_sys;
+    sys->last_dst = NULL;
+    hw_mmal_vzc_pool_flush(sys->vzc);
+}
+
+static void CloseBlendMmal(vlc_object_t *object)
+{
+    filter_t * const p_filter = (filter_t *)object;
+    blend_sys_t * const sys = (blend_sys_t *)p_filter->p_sys;
+
+    if (sys != NULL) {
+        p_filter->p_sys = NULL;
+
+        hw_mmal_vzc_pool_release(sys->vzc);
+        cma_vcsm_exit(sys->vcsm_init_type);
+        free(sys);
+    }
+}
+
+static int OpenBlendMmal(vlc_object_t *object)
+{
+    filter_t * const p_filter = (filter_t *)object;
+    const vlc_fourcc_t vfcc_dst = p_filter->fmt_out.video.i_chroma;
+
+    if (!hw_mmal_chroma_is_mmal(vfcc_dst) ||
+        !hw_mmal_vzc_subpic_fmt_valid(&p_filter->fmt_in.video))
+    {
+        return VLC_EGENERIC;
+    }
+
+    {
+        char dbuf0[5], dbuf1[5];
+        msg_Dbg(p_filter, "%s: (%s) %s,%dx%d [(%d,%d) %dx%d]->%s,%dx%d [(%d,%d) %dx%d]", __func__,
+                "blend",
+                str_fourcc(dbuf0, p_filter->fmt_in.video.i_chroma), p_filter->fmt_in.video.i_width, p_filter->fmt_in.video.i_height,
+                p_filter->fmt_in.video.i_x_offset, p_filter->fmt_in.video.i_y_offset,
+                p_filter->fmt_in.video.i_visible_width, p_filter->fmt_in.video.i_visible_height,
+                str_fourcc(dbuf1, p_filter->fmt_out.video.i_chroma), p_filter->fmt_out.video.i_width, p_filter->fmt_out.video.i_height,
+                p_filter->fmt_out.video.i_x_offset, p_filter->fmt_out.video.i_y_offset,
+                p_filter->fmt_out.video.i_visible_width, p_filter->fmt_out.video.i_visible_height);
+    }
+
+    {
+        blend_sys_t * const sys = calloc(1, sizeof (*sys));
+        if (sys == NULL)
+            return VLC_ENOMEM;
+
+        p_filter->p_sys = (filter_sys_t *)sys;
+
+        if ((sys->vcsm_init_type = cma_vcsm_init()) == VCSM_INIT_NONE) {
+            msg_Err(p_filter, "VCSM init failed");
+            goto fail;
+        }
+
+        if ((sys->vzc = hw_mmal_vzc_pool_new()) == NULL)
+            goto fail;
+    }
+
+    p_filter->pf_video_blend = FilterBlendMmal;
+    p_filter->pf_flush = FlushBlendMmal;
+
+    return VLC_SUCCESS;
+
+fail:
+    CloseBlendMmal(VLC_OBJECT(p_filter));
+    return VLC_ENOMEM;
+}
+
+// ---------------------------------------------------------------------------
+
+static void FilterBlendNeon(filter_t *p_filter,
+                  picture_t *dst_pic, const picture_t * src_pic,
+                  int x_offset, int y_offset, int alpha)
+{
+    const uint8_t * s_data;
+    uint8_t * d_data;
+    int width = src_pic->format.i_visible_width;
+    int height = src_pic->format.i_visible_height;
+    blend_neon_fn *const blend_fn = (blend_neon_fn * )p_filter->p_sys;
+
+#if TRACE_ALL
+    msg_Dbg(p_filter, "%s (%d,%d:%d) pic=%p, pts=%lld, force=%d", __func__, x_offset, y_offset, alpha, src_pic, src_pic->date, src_pic->b_force);
+#endif
+
+    if (alpha == 0 ||
+        src_pic->format.i_visible_height == 0 ||
+        src_pic->format.i_visible_width == 0)
+    {
+        return;
+    }
+
+    x_offset += dst_pic->format.i_x_offset;
+    y_offset += dst_pic->format.i_y_offset;
+
+    // Deal with R/B overrun
+    if (x_offset + width >= (int)(dst_pic->format.i_x_offset + dst_pic->format.i_visible_width))
+        width = dst_pic->format.i_x_offset + dst_pic->format.i_visible_width - x_offset;
+    if (y_offset + height >= (int)(dst_pic->format.i_y_offset + dst_pic->format.i_visible_height))
+        height = dst_pic->format.i_y_offset + dst_pic->format.i_visible_height - y_offset;
+
+    if (width <= 0 || height <= 0) {
+        return;
+    }
+
+    // *** L/U overrun
+
+    s_data = src_pic->p[0].p_pixels +
+        src_pic->p[0].i_pixel_pitch * src_pic->format.i_x_offset +
+        src_pic->p[0].i_pitch * src_pic->format.i_y_offset;
+    d_data = dst_pic->p[0].p_pixels +
+        dst_pic->p[0].i_pixel_pitch * x_offset +
+        dst_pic->p[0].i_pitch * y_offset;
+
+
+    do {
+        blend_fn(d_data, s_data, alpha, width);
+        s_data += src_pic->p[0].i_pitch;
+        d_data += dst_pic->p[0].i_pitch;
+    } while (--height > 0);
+}
+
+static void CloseBlendNeon(vlc_object_t *object)
+{
+    VLC_UNUSED(object);
+}
+
+static int OpenBlendNeon(vlc_object_t *object)
+{
+    filter_t * const p_filter = (filter_t *)object;
+    const vlc_fourcc_t vfcc_dst = p_filter->fmt_out.video.i_chroma;
+    MMAL_FOURCC_T mfcc_src = vlc_to_mmal_video_fourcc(&p_filter->fmt_in.video);
+    MMAL_FOURCC_T mfcc_dst = vlc_to_mmal_video_fourcc(&p_filter->fmt_out.video);
+    blend_neon_fn * blend_fn = (blend_neon_fn *)0;
+
+    // Non-alpha RGB only for dest
+    if (vfcc_dst != VLC_CODEC_RGB32)
+        return VLC_EGENERIC;
+
+    // Check we have appropriate blend fn (mmal doesn't have a non-alpha RGB32)
+    switch (mfcc_src) {
+    case MMAL_ENCODING_RGBA:
+        if (mfcc_dst == MMAL_ENCODING_RGBA)
+            blend_fn = blend_rgbx_rgba_neon;
+        else if (mfcc_dst == MMAL_ENCODING_BGRA)
+            blend_fn = blend_bgrx_rgba_neon;
+        break;
+
+    case MMAL_ENCODING_BGRA:
+        if (mfcc_dst == MMAL_ENCODING_BGRA)
+            blend_fn = blend_rgbx_rgba_neon;
+        else if (mfcc_dst == MMAL_ENCODING_RGBA)
+            blend_fn = blend_bgrx_rgba_neon;
+        break;
+
+    default:
+        break;
+    }
+
+    if (blend_fn == (blend_neon_fn *)0)
+    {
+        return VLC_EGENERIC;
+    }
+
+    p_filter->p_sys = (void *)blend_fn;
+    p_filter->pf_video_blend = FilterBlendNeon;
+
+    {
+        char dbuf0[5], dbuf1[5];
+        char dbuf0a[5], dbuf1a[5];
+        msg_Dbg(p_filter, "%s: (%s) %s/%s,%dx%d [(%d,%d) %dx%d]->%s/%s,%dx%d [(%d,%d) %dx%d]", __func__,
+                "blend",
+                str_fourcc(dbuf0, p_filter->fmt_in.video.i_chroma),
+                str_fourcc(dbuf0a, mfcc_src),
+                p_filter->fmt_in.video.i_width, p_filter->fmt_in.video.i_height,
+                p_filter->fmt_in.video.i_x_offset, p_filter->fmt_in.video.i_y_offset,
+                p_filter->fmt_in.video.i_visible_width, p_filter->fmt_in.video.i_visible_height,
+                str_fourcc(dbuf1, p_filter->fmt_out.video.i_chroma),
+                str_fourcc(dbuf1a, mfcc_dst),
+                p_filter->fmt_out.video.i_width, p_filter->fmt_out.video.i_height,
+                p_filter->fmt_out.video.i_x_offset, p_filter->fmt_out.video.i_y_offset,
+                p_filter->fmt_out.video.i_visible_width, p_filter->fmt_out.video.i_visible_height);
+    }
+
+    return VLC_SUCCESS;
+}
+
+vlc_module_begin()
+    set_category( CAT_INPUT )
+    set_subcategory( SUBCAT_INPUT_VCODEC )
+    set_shortname(N_("MMAL decoder"))
+    set_description(N_("MMAL-based decoder plugin for Raspberry Pi"))
+    set_capability("video decoder", 90)
+    add_shortcut("mmal_decoder")
+    add_bool(MMAL_OPAQUE_NAME, true, MMAL_OPAQUE_TEXT, MMAL_OPAQUE_LONGTEXT, false)
+    set_callbacks(OpenDecoder, CloseDecoder)
+
+    add_submodule()
+    set_category( CAT_VIDEO )
+    set_subcategory( SUBCAT_VIDEO_VFILTER )
+    set_shortname(N_("MMAL resizer"))
+    set_description(N_("MMAL resizing conversion filter"))
+    add_shortcut("mmal_converter")
+    set_capability( "video converter", 900 )
+    add_bool(MMAL_RESIZE_NAME, /* default */ false, MMAL_RESIZE_TEXT, MMAL_RESIZE_LONGTEXT, /* advanced option */ false)
+    add_bool(MMAL_ISP_NAME, /* default */ false, MMAL_ISP_TEXT, MMAL_ISP_LONGTEXT, /* advanced option */ false)
+    set_callbacks(OpenConverter, CloseConverter)
+
+#if OPT_TO_FROM_ZC
+    add_submodule()
+    set_category( CAT_VIDEO )
+    set_subcategory( SUBCAT_VIDEO_VFILTER )
+    set_shortname(N_("MMAL to ZC"))
+    set_description(N_("MMAL conversion to ZC filter"))
+    add_shortcut("mmal_to_zc")
+    set_capability( "video converter", 901 )
+    set_callbacks(OpenConverterToZc, CloseConverterToZc)
+
+    add_submodule()
+    set_category( CAT_VIDEO )
+    set_subcategory( SUBCAT_VIDEO_VFILTER )
+    set_shortname(N_("MMAL from ZC"))
+    set_description(N_("MMAL conversion from ZC filter"))
+    add_shortcut("mmal_from_zc")
+    set_capability( "video converter", 902 )
+    set_callbacks(OpenConverterFromZc, CloseConverterFromZc)
+#endif
+
+    add_submodule()
+    set_category( CAT_VIDEO )
+    set_subcategory( SUBCAT_VIDEO_VFILTER )
+    set_description(N_("Video pictures blending for MMAL"))
+    add_shortcut("mmal_blend")
+    set_capability("video blending", 120)
+    set_callbacks(OpenBlendMmal, CloseBlendMmal)
+
+    add_submodule()
+    set_category( CAT_VIDEO )
+    set_subcategory( SUBCAT_VIDEO_VFILTER )
+    set_description(N_("Video pictures blending for neon"))
+    add_shortcut("neon_blend")
+    set_capability("video blending", 110)
+    set_callbacks(OpenBlendNeon, CloseBlendNeon)
+
+vlc_module_end()
+
+
--- /dev/null
+++ b/modules/hw/mmal/converter_mmal.c
@@ -0,0 +1,479 @@
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <interface/vcsm/user-vcsm.h>
+
+#include <vlc_common.h>
+#include <vlc_picture.h>
+
+#include <libdrm/drm_fourcc.h>
+#include <EGL/egl.h>
+#include <EGL/eglext.h>
+#include <GLES2/gl2.h>
+#include <GLES2/gl2ext.h>
+
+#include "mmal_cma.h"
+
+#include "../../video_output/opengl/converter.h"
+
+#include "mmal_picture.h"
+
+#include <assert.h>
+
+#define TRACE_ALL 0
+
+typedef struct mmal_gl_converter_s
+{
+    EGLint drm_fourcc;
+    vcsm_init_type_t vcsm_init_type;
+    cma_buf_t * last_cb;
+
+    PFNGLEGLIMAGETARGETTEXTURE2DOESPROC glEGLImageTargetTexture2DOES;
+} mmal_gl_converter_t;
+
+
+static EGLint vlc_to_gl_fourcc(const video_format_t * const fmt)
+{
+    // Converting to mmal selects the right RGB32 varient
+    switch(vlc_to_mmal_video_fourcc(fmt))
+    {
+       case MMAL_ENCODING_I420:
+          return MMAL_FOURCC('Y','U','1','2');
+       case MMAL_ENCODING_YV12:
+          return MMAL_FOURCC('Y','V','1','2');
+       case MMAL_ENCODING_I422:
+          return MMAL_FOURCC('Y','U','1','6');
+//       case MMAL_ENCODING_YUVUV128:  // Doesn't actually work yet
+       case MMAL_ENCODING_NV12:
+          return MMAL_FOURCC('N','V','1','2');
+       case MMAL_ENCODING_NV21:
+          return MMAL_FOURCC('N','V','2','1');
+       case MMAL_ENCODING_RGB16:
+          return MMAL_FOURCC('R','G','1','6');
+       case MMAL_ENCODING_RGB24:
+          return MMAL_FOURCC('B','G','2','4');
+       case MMAL_ENCODING_BGR24:
+          return MMAL_FOURCC('R','G','2','4');
+       case MMAL_ENCODING_BGR32:
+       case MMAL_ENCODING_BGRA:
+          return MMAL_FOURCC('X','R','2','4');
+       case MMAL_ENCODING_RGB32:
+       case MMAL_ENCODING_RGBA:
+          return MMAL_FOURCC('X','B','2','4');
+       default:
+          break;
+    }
+    return 0;
+}
+
+typedef struct tex_context_s {
+    picture_context_t cmn;
+    GLuint texture;
+
+    PFNGLDELETETEXTURESPROC DeleteTextures;  // Copy fn pointer so we don't need tc on delete
+} tex_context_t;
+
+static void tex_context_delete(tex_context_t * const tex)
+{
+    tex->DeleteTextures(1, &tex->texture);
+    free(tex);
+}
+
+static void tex_context_destroy(picture_context_t * pic_ctx)
+{
+    tex_context_delete((tex_context_t *)pic_ctx);
+}
+
+static picture_context_t * tex_context_copy(picture_context_t * pic_ctx)
+{
+    return pic_ctx;
+}
+
+static tex_context_t * get_tex_context(const opengl_tex_converter_t * const tc, picture_t * const pic, cma_buf_t * const cb)
+{
+    mmal_gl_converter_t * const sys = tc->priv;
+    tex_context_t * tex = (tex_context_t *)cma_buf_context2(cb);
+    if (tex != NULL)
+        return tex;
+
+    if ((tex = malloc(sizeof(*tex))) == NULL)
+        return NULL;
+
+    *tex = (tex_context_t){
+        .cmn = {
+            .destroy = tex_context_destroy,
+            .copy = tex_context_copy
+        },
+        .texture = 0,
+        .DeleteTextures = tc->vt->DeleteTextures
+    };
+
+    {
+        EGLint attribs[30];
+        EGLint * a = attribs;
+        const int fd = cma_buf_fd(cb);
+        uint8_t * base_addr = cma_buf_addr(cb);
+
+        if (pic->i_planes >= 4 || pic->i_planes <= 0)
+        {
+            msg_Err(tc, "%s: Bad planes: %d", __func__, pic->i_planes);
+            goto fail;
+        }
+
+        *a++ = EGL_WIDTH;
+        *a++ = pic->format.i_visible_width;
+        *a++ = EGL_HEIGHT;
+        *a++ = pic->format.i_visible_height;
+        *a++ = EGL_LINUX_DRM_FOURCC_EXT;
+        *a++ = sys->drm_fourcc;
+
+        if (pic->format.i_chroma == VLC_CODEC_MMAL_ZC_SAND8)
+        {
+            // Sand is its own very special bunny :-(
+            static const EGLint attnames[] = {
+                EGL_DMA_BUF_PLANE0_FD_EXT,
+                EGL_DMA_BUF_PLANE0_OFFSET_EXT,
+                EGL_DMA_BUF_PLANE0_PITCH_EXT,
+                EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT,
+                EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT,
+                EGL_DMA_BUF_PLANE1_FD_EXT,
+                EGL_DMA_BUF_PLANE1_OFFSET_EXT,
+                EGL_DMA_BUF_PLANE1_PITCH_EXT,
+                EGL_DMA_BUF_PLANE1_MODIFIER_HI_EXT,
+                EGL_DMA_BUF_PLANE1_MODIFIER_LO_EXT
+            };
+
+            const EGLint * n = attnames;
+
+            for (int i = 0; i < pic->i_planes; ++i)
+            {
+                const uint64_t mod = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(pic->p[i].i_pitch >> 7);
+
+                *a++ = *n++;
+                *a++ = fd;
+                *a++ = *n++;
+                *a++ = pic->p[i].p_pixels - base_addr;
+                *a++ = *n++;
+                *a++ = pic->format.i_width;
+                *a++ = *n++;
+                *a++ = (EGLint)(mod >> 32);
+                *a++ = *n++;
+                *a++ = (EGLint)(mod & 0xffffffff);
+            }
+        }
+        else
+        {
+            static const EGLint attnames[] = {
+                EGL_DMA_BUF_PLANE0_FD_EXT,
+                EGL_DMA_BUF_PLANE0_OFFSET_EXT,
+                EGL_DMA_BUF_PLANE0_PITCH_EXT,
+                EGL_DMA_BUF_PLANE1_FD_EXT,
+                EGL_DMA_BUF_PLANE1_OFFSET_EXT,
+                EGL_DMA_BUF_PLANE1_PITCH_EXT,
+                EGL_DMA_BUF_PLANE2_FD_EXT,
+                EGL_DMA_BUF_PLANE2_OFFSET_EXT,
+                EGL_DMA_BUF_PLANE2_PITCH_EXT,
+                EGL_DMA_BUF_PLANE3_FD_EXT,
+                EGL_DMA_BUF_PLANE3_OFFSET_EXT,
+                EGL_DMA_BUF_PLANE3_PITCH_EXT
+            };
+
+            const EGLint * n = attnames;
+
+            for (int i = 0; i < pic->i_planes; ++i)
+            {
+                *a++ = *n++;
+                *a++ = fd;
+                *a++ = *n++;
+                *a++ = pic->p[i].p_pixels - base_addr;
+                *a++ = *n++;
+                *a++ = pic->p[i].i_pitch;
+            }
+        }
+
+        *a = EGL_NONE;
+
+        const EGLImage image = tc->gl->egl.createImageKHR(tc->gl, EGL_LINUX_DMA_BUF_EXT, NULL, attribs);
+        if (!image) {
+           msg_Err(tc, "Failed to import fd %d: Err=%#x", fd, tc->vt->GetError());
+           goto fail;
+        }
+
+        // ** ?? tc->tex_target
+        tc->vt->GenTextures(1, &tex->texture);
+        tc->vt->BindTexture(GL_TEXTURE_EXTERNAL_OES, tex->texture);
+        tc->vt->TexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+        tc->vt->TexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+        sys->glEGLImageTargetTexture2DOES(GL_TEXTURE_EXTERNAL_OES, image);
+
+        tc->gl->egl.destroyImageKHR(tc->gl, image);
+    }
+
+    if (cma_buf_add_context2(cb, &tex->cmn) != VLC_SUCCESS)
+    {
+        msg_Err(tc, "%s: add_context2 failed", __func__);
+        goto fail;
+    }
+    return tex;
+
+fail:
+    tex_context_delete(tex);
+    return NULL;
+}
+
+
+static int
+tc_mmal_update(const opengl_tex_converter_t *tc, GLuint *textures,
+                const GLsizei *tex_width, const GLsizei *tex_height,
+                picture_t *pic, const size_t *plane_offset)
+{
+    mmal_gl_converter_t * const sys = tc->priv;
+#if TRACE_ALL
+    {
+        char cbuf[5];
+        msg_Dbg(tc, "%s: %s %d*%dx%d : %d*%dx%d", __func__,
+                str_fourcc(cbuf, pic->format.i_chroma),
+                tc->tex_count, tex_width[0], tex_height[0], pic->i_planes, pic->p[0].i_pitch, pic->p[0].i_lines);
+    }
+#endif
+    VLC_UNUSED(tex_width);
+    VLC_UNUSED(tex_height);
+    VLC_UNUSED(plane_offset);
+
+    if (!is_cma_buf_pic_chroma(pic->format.i_chroma))
+    {
+        char cbuf[5];
+        msg_Err(tc, "Pic with unexpected chroma: %s", str_fourcc(cbuf, pic->format.i_chroma));
+        return VLC_EGENERIC;
+    }
+
+    cma_buf_t * const cb = cma_buf_pic_get(pic);
+    if (cb == NULL)
+    {
+        msg_Err(tc, "Pic missing cma buf");
+        return VLC_EGENERIC;
+    }
+
+    tex_context_t * const tex = get_tex_context(tc, pic, cb);
+    if (tex == NULL)
+        return VLC_EGENERIC;
+
+//    tc->vt->BindTexture(GL_TEXTURE_EXTERNAL_OES, tex->texture);
+
+    cma_buf_unref(sys->last_cb);
+    sys->last_cb = cma_buf_ref(cb);
+
+    textures[0] = tex->texture;
+    return VLC_SUCCESS;
+}
+
+static int
+tc_mmal_fetch_locations(opengl_tex_converter_t *tc, GLuint program)
+{
+    tc->uloc.Texture[0] = tc->vt->GetUniformLocation(program, "Texture0");
+    return tc->uloc.Texture[0] != -1 ? VLC_SUCCESS : VLC_EGENERIC;
+}
+
+static void
+tc_mmal_prepare_shader(const opengl_tex_converter_t *tc,
+                        const GLsizei *tex_width, const GLsizei *tex_height,
+                        float alpha)
+{
+    (void) tex_width; (void) tex_height; (void) alpha;
+    VLC_UNUSED(tc);
+//    tc->vt->Uniform1i(tc->uloc.Texture[0], 0);
+}
+
+static GLuint
+tc_fragment_shader_init(opengl_tex_converter_t * const tc, const GLenum tex_target,
+                        const vlc_fourcc_t chroma, const video_color_space_t yuv_space)
+{
+    VLC_UNUSED(yuv_space);
+
+    tc->tex_count = 1;
+    tc->tex_target = tex_target;
+    tc->texs[0] = (struct opengl_tex_cfg) {
+        { 1, 1 }, { 1, 1 }, GL_RGB, chroma, GL_UNSIGNED_SHORT  //** ??
+    };
+
+    tc->pf_fetch_locations = tc_mmal_fetch_locations;
+    tc->pf_prepare_shader = tc_mmal_prepare_shader;
+
+
+    const char fs[] =
+       "#extension GL_OES_EGL_image_external : enable\n"
+       "precision mediump float;\n"
+       "uniform samplerExternalOES Texture0;\n"
+       "varying vec2 TexCoord0;\n"
+       "void main() {\n"
+       "  gl_FragColor = texture2D(Texture0, TexCoord0);\n"
+       "}\n";
+
+
+    const char *code = fs;
+
+    GLuint fragment_shader = tc->vt->CreateShader(GL_FRAGMENT_SHADER);
+    tc->vt->ShaderSource(fragment_shader, 1, &code, NULL);
+    tc->vt->CompileShader(fragment_shader);
+    return fragment_shader;
+}
+
+
+static void
+CloseGLConverter(vlc_object_t *obj)
+{
+    opengl_tex_converter_t * const tc = (opengl_tex_converter_t *)obj;
+    mmal_gl_converter_t * const sys = tc->priv;
+
+    if (sys == NULL)
+        return;
+
+    cma_buf_unref(sys->last_cb);
+    cma_vcsm_exit(sys->vcsm_init_type);
+    free(sys);
+}
+
+
+// Pick a chroma that we can convert to
+// Prefer I420 as smallest
+static vlc_fourcc_t chroma_in_out(const vlc_fourcc_t chroma_in)
+{
+    switch (chroma_in)
+    {
+        case VLC_CODEC_MMAL_OPAQUE:
+        case VLC_CODEC_MMAL_ZC_I420:
+        case VLC_CODEC_MMAL_ZC_SAND8:
+        case VLC_CODEC_MMAL_ZC_SAND10:          // ISP only
+            return VLC_CODEC_MMAL_ZC_I420;
+        case VLC_CODEC_MMAL_ZC_SAND30:          // HVS only
+        case VLC_CODEC_MMAL_ZC_RGB32:
+            return VLC_CODEC_MMAL_ZC_RGB32;     // HVS can't generate YUV of any sort
+        default:
+            break;
+    }
+    return 0;
+}
+
+
+static int
+OpenGLConverter(vlc_object_t *obj)
+{
+    opengl_tex_converter_t * const tc = (opengl_tex_converter_t *)obj;
+    int rv = VLC_EGENERIC;
+    const EGLint eglfmt = vlc_to_gl_fourcc(&tc->fmt);
+    const vlc_fourcc_t chroma_out = chroma_in_out(tc->fmt.i_chroma);
+
+    // Do we know what to do with this?
+    if (chroma_out == 0)
+        return rv;
+
+    {
+        char dbuf0[5], dbuf1[5], dbuf2[5];
+        msg_Dbg(tc, "<<< %s: V:%s/E:%s,%dx%d [(%d,%d) %d/%d] sar:%d/%d -> %s", __func__,
+                str_fourcc(dbuf0, tc->fmt.i_chroma),
+                str_fourcc(dbuf1, eglfmt),
+                tc->fmt.i_width, tc->fmt.i_height,
+                tc->fmt.i_x_offset, tc->fmt.i_y_offset,
+                tc->fmt.i_visible_width, tc->fmt.i_visible_height,
+                tc->fmt.i_sar_num, tc->fmt.i_sar_den,
+                str_fourcc(dbuf2, chroma_out));
+    }
+
+    if (tc->gl->ext != VLC_GL_EXT_EGL ||
+        !tc->gl->egl.createImageKHR || !tc->gl->egl.destroyImageKHR)
+    {
+        // Missing an important callback
+        msg_Dbg(tc, "Missing EGL xxxImageKHR calls");
+        return rv;
+    }
+
+    if ((tc->priv = calloc(1, sizeof(mmal_gl_converter_t))) == NULL)
+    {
+        msg_Err(tc, "priv alloc failure");
+        rv = VLC_ENOMEM;
+        goto fail;
+    }
+    mmal_gl_converter_t * const sys = tc->priv;
+
+    sys->drm_fourcc = eglfmt;
+
+    if ((sys->vcsm_init_type = cma_vcsm_init()) != VCSM_INIT_CMA) {
+        msg_Dbg(tc, "VCSM init failed");
+        goto fail;
+    }
+
+    if ((sys->glEGLImageTargetTexture2DOES = vlc_gl_GetProcAddress(tc->gl, "glEGLImageTargetTexture2DOES")) == NULL)
+    {
+        msg_Err(tc, "Failed to bind GL fns");
+        goto fail;
+    }
+
+    if ((tc->fshader = tc_fragment_shader_init(tc, GL_TEXTURE_EXTERNAL_OES,
+                                                   eglfmt == 0 ? VLC_CODEC_RGB32 : tc->fmt.i_chroma,
+                                                   eglfmt == 0 ? COLOR_SPACE_SRGB : tc->fmt.space)) == 0)
+    {
+        msg_Err(tc, "Failed to make shader");
+        goto fail;
+    }
+
+    if (eglfmt == 0)
+    {
+        tc->fmt.i_chroma = chroma_out;
+        tc->fmt.i_bits_per_pixel = 8;
+        if (tc->fmt.i_chroma == VLC_CODEC_MMAL_ZC_RGB32)
+        {
+            tc->fmt.i_rmask = 0xff0000;
+            tc->fmt.i_gmask = 0xff00;
+            tc->fmt.i_bmask = 0xff;
+            tc->fmt.space = COLOR_SPACE_SRGB;
+        }
+        else
+        {
+            tc->fmt.i_rmask = 0;
+            tc->fmt.i_gmask = 0;
+            tc->fmt.i_bmask = 0;
+            tc->fmt.space = COLOR_SPACE_UNDEF;
+        }
+        sys->drm_fourcc = vlc_to_gl_fourcc(&tc->fmt);
+    }
+
+    tc->handle_texs_gen = true;  // We manage the texs
+    tc->pf_update  = tc_mmal_update;
+
+#if TRACE_ALL
+    {
+        char dbuf0[5], dbuf1[5], dbuf2[5];
+        msg_Dbg(tc, ">>> %s: V:%s/E:%s,%dx%d [(%d,%d) %d/%d] sar:%d/%d -> %s", __func__,
+                str_fourcc(dbuf0, tc->fmt.i_chroma),
+                str_fourcc(dbuf1, sys->drm_fourcc),
+                tc->fmt.i_width, tc->fmt.i_height,
+                tc->fmt.i_x_offset, tc->fmt.i_y_offset,
+                tc->fmt.i_visible_width, tc->fmt.i_visible_height,
+                tc->fmt.i_sar_num, tc->fmt.i_sar_den,
+                str_fourcc(dbuf2, chroma_out));
+    }
+#endif
+
+    return VLC_SUCCESS;
+
+fail:
+    CloseGLConverter(obj);
+    return rv;
+}
+
+vlc_module_begin ()
+    set_description("MMAL OpenGL surface converter")
+    set_shortname (N_("MMALGLConverter"))
+    set_capability("glconv", 900)
+    set_callbacks(OpenGLConverter, CloseGLConverter)
+    set_category(CAT_VIDEO)
+    set_subcategory(SUBCAT_VIDEO_VOUT)
+    add_shortcut("mmal_gl_converter")
+vlc_module_end ()
+
--- a/modules/hw/mmal/deinterlace.c
+++ b/modules/hw/mmal/deinterlace.c
@@ -26,11 +26,12 @@
 #include "config.h"
 #endif
 
-#include <vlc_picture_pool.h>
+#include <stdatomic.h>
+
 #include <vlc_common.h>
+#include <vlc_picture_pool.h>
 #include <vlc_plugin.h>
 #include <vlc_filter.h>
-#include <vlc_atomic.h>
 
 #include "mmal_picture.h"
 
@@ -39,468 +40,814 @@
 #include <interface/mmal/util/mmal_util.h>
 #include <interface/mmal/util/mmal_default_components.h>
 
-#define MIN_NUM_BUFFERS_IN_TRANSIT 2
+#define MMAL_DEINTERLACE_NO_QPU "mmal-deinterlace-no-qpu"
+#define MMAL_DEINTERLACE_NO_QPU_TEXT N_("Do not use QPUs for advanced HD deinterlacing.")
+#define MMAL_DEINTERLACE_NO_QPU_LONGTEXT N_("Do not make use of the QPUs to allow higher quality deinterlacing of HD content.")
 
-#define MMAL_DEINTERLACE_QPU "mmal-deinterlace-adv-qpu"
-#define MMAL_DEINTERLACE_QPU_TEXT N_("Use QPUs for advanced HD deinterlacing.")
-#define MMAL_DEINTERLACE_QPU_LONGTEXT N_("Make use of the QPUs to allow higher quality deinterlacing of HD content.")
+#define MMAL_DEINTERLACE_ADV "mmal-deinterlace-adv"
+#define MMAL_DEINTERLACE_ADV_TEXT N_("Force advanced deinterlace")
+#define MMAL_DEINTERLACE_ADV_LONGTEXT N_("Force advanced deinterlace")
 
-static int Open(filter_t *filter);
-static void Close(filter_t *filter);
+#define MMAL_DEINTERLACE_FAST "mmal-deinterlace-fast"
+#define MMAL_DEINTERLACE_FAST_TEXT N_("Force fast deinterlace")
+#define MMAL_DEINTERLACE_FAST_LONGTEXT N_("Force fast deinterlace")
 
-vlc_module_begin()
-    set_shortname(N_("MMAL deinterlace"))
-    set_description(N_("MMAL-based deinterlace filter plugin"))
-    set_capability("video filter", 0)
-    set_category(CAT_VIDEO)
-    set_subcategory(SUBCAT_VIDEO_VFILTER)
-    set_callbacks(Open, Close)
-    add_shortcut("deinterlace")
-    add_bool(MMAL_DEINTERLACE_QPU, false, MMAL_DEINTERLACE_QPU_TEXT,
-                    MMAL_DEINTERLACE_QPU_LONGTEXT, true);
-vlc_module_end()
+#define MMAL_DEINTERLACE_NONE "mmal-deinterlace-none"
+#define MMAL_DEINTERLACE_NONE_TEXT N_("Force no deinterlace")
+#define MMAL_DEINTERLACE_NONE_LONGTEXT N_("Force no interlace. Simply strips off the interlace markers and passes the frame straight through. "\
+    "This is the default for > SD if < 96M gpu-mem")
+
+#define MMAL_DEINTERLACE_HALF_RATE "mmal-deinterlace-half-rate"
+#define MMAL_DEINTERLACE_HALF_RATE_TEXT N_("Halve output framerate")
+#define MMAL_DEINTERLACE_HALF_RATE_LONGTEXT N_("Halve output framerate. 1 output frame for each pair of interlaced fields input")
+
+#define MMAL_DEINTERLACE_FULL_RATE "mmal-deinterlace-full-rate"
+#define MMAL_DEINTERLACE_FULL_RATE_TEXT N_("Full output framerate")
+#define MMAL_DEINTERLACE_FULL_RATE_LONGTEXT N_("Full output framerate. 1 output frame for each interlaced field input")
 
-struct filter_sys_t {
+
+typedef struct filter_sys_t
+{
     MMAL_COMPONENT_T *component;
     MMAL_PORT_T *input;
     MMAL_PORT_T *output;
+    MMAL_POOL_T *in_pool;
+
+    MMAL_QUEUE_T * out_q;
+
+    // Bind this lot somehow into ppr????
+    bool is_cma;
+    cma_buf_pool_t * cma_out_pool;
+    MMAL_POOL_T * out_pool;
+
+    hw_mmal_port_pool_ref_t *out_ppr;
+
+    bool half_rate;
+    bool use_qpu;
+    bool use_fast;
+    bool use_passthrough;
+    unsigned int seq_in;    // Seq of next frame to submit (1-15) [Init=1]
+    unsigned int seq_out;   // Seq of last frame received  (1-15) [Init=15]
 
-    MMAL_QUEUE_T *filtered_pictures;
-    vlc_sem_t sem;
+    vcsm_init_type_t vcsm_init_type;
 
-    atomic_bool started;
+} filter_sys_t;
 
-    /* statistics */
-    int output_in_transit;
-    int input_in_transit;
-};
-
-static void control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer);
-static void input_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer);
-static void output_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer);
-static picture_t *deinterlace(filter_t *filter, picture_t *picture);
-static void flush(filter_t *filter);
 
 #define MMAL_COMPONENT_DEFAULT_DEINTERLACE "vc.ril.image_fx"
 
-static int Open(filter_t *filter)
+#define TRACE_ALL 0
+
+
+
+// Buffer attached to pic on success, is still valid on failure
+static picture_t * di_alloc_opaque(filter_t * const p_filter, MMAL_BUFFER_HEADER_T * const buf)
 {
-    int32_t frame_duration = filter->fmt_in.video.i_frame_rate != 0 ?
-            (int64_t)1000000 * filter->fmt_in.video.i_frame_rate_base /
-            filter->fmt_in.video.i_frame_rate : 0;
-    bool use_qpu = var_InheritBool(filter, MMAL_DEINTERLACE_QPU);
+    filter_sys_t *const filter_sys = p_filter->p_sys;
+    picture_t * const pic = filter_NewPicture(p_filter);
 
-    MMAL_PARAMETER_IMAGEFX_PARAMETERS_T imfx_param = {
-            { MMAL_PARAMETER_IMAGE_EFFECT_PARAMETERS, sizeof(imfx_param) },
-            MMAL_PARAM_IMAGEFX_DEINTERLACE_ADV,
-            4,
-            { 3, frame_duration, 0, use_qpu }
-    };
+    if (pic == NULL)
+        goto fail1;
 
-    int ret = VLC_SUCCESS;
-    MMAL_STATUS_T status;
-    filter_sys_t *sys;
+    if (buf->length == 0) {
+        msg_Err(p_filter, "%s: Empty buffer", __func__);
+        goto fail2;
+    }
 
-    msg_Dbg(filter, "Try to open mmal_deinterlace filter. frame_duration: %d, QPU %s!",
-            frame_duration, use_qpu ? "used" : "unused");
+    if ((pic->context = hw_mmal_gen_context(buf, filter_sys->out_ppr)) == NULL)
+        goto fail2;
 
-    if (filter->fmt_in.video.i_chroma != VLC_CODEC_MMAL_OPAQUE)
-        return VLC_EGENERIC;
+    buf_to_pic_copy_props(pic, buf);
 
-    if (filter->fmt_out.video.i_chroma != VLC_CODEC_MMAL_OPAQUE)
-        return VLC_EGENERIC;
+#if TRACE_ALL
+    msg_Dbg(p_filter, "pic: prog=%d, tff=%d, date=%lld", pic->b_progressive, pic->b_top_field_first, (long long)pic->date);
+#endif
 
-    sys = calloc(1, sizeof(filter_sys_t));
-    if (!sys)
-        return VLC_ENOMEM;
-    filter->p_sys = sys;
+    return pic;
 
-    bcm_host_init();
+fail2:
+    picture_Release(pic);
+fail1:
+//    mmal_buffer_header_release(buf);
+    return NULL;
+}
 
-    status = mmal_component_create(MMAL_COMPONENT_DEFAULT_DEINTERLACE, &sys->component);
-    if (status != MMAL_SUCCESS) {
-        msg_Err(filter, "Failed to create MMAL component %s (status=%"PRIx32" %s)",
-                MMAL_COMPONENT_DEFAULT_DEINTERLACE, status, mmal_status_to_string(status));
-        ret = VLC_EGENERIC;
-        goto out;
-    }
+static void di_input_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
+{
+#if TRACE_ALL
+    pic_ctx_mmal_t * ctx = buffer->user_data;
+//    filter_sys_t *const sys = ((filter_t *)port->userdata)->p_sys;
+
+    msg_Dbg((filter_t *)port->userdata, "<<< %s: cmd=%d, ctx=%p, buf=%p, flags=%#x, pts=%lld", __func__, buffer->cmd, ctx, buffer,
+            buffer->flags, (long long)buffer->pts);
+#else
+    VLC_UNUSED(port);
+#endif
 
-    status = mmal_port_parameter_set(sys->component->output[0], &imfx_param.hdr);
-    if (status != MMAL_SUCCESS) {
-        msg_Err(filter, "Failed to configure MMAL component %s (status=%"PRIx32" %s)",
-                MMAL_COMPONENT_DEFAULT_DEINTERLACE, status, mmal_status_to_string(status));
-        ret = VLC_EGENERIC;
-        goto out;
-    }
+    mmal_buffer_header_release(buffer);
 
-    sys->component->control->userdata = (struct MMAL_PORT_USERDATA_T *)filter;
-    status = mmal_port_enable(sys->component->control, control_port_cb);
-    if (status != MMAL_SUCCESS) {
-        msg_Err(filter, "Failed to enable control port %s (status=%"PRIx32" %s)",
-                sys->component->control->name, status, mmal_status_to_string(status));
-        ret = VLC_EGENERIC;
-        goto out;
+#if TRACE_ALL
+    msg_Dbg((filter_t *)port->userdata, ">>> %s", __func__);
+#endif
+}
+
+static void di_output_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buf)
+{
+    if (buf->cmd == 0 && buf->length != 0)
+    {
+        // The filter structure etc. should always exist if we have contents
+        // but might not on later flushes as we shut down
+        filter_t * const p_filter = (filter_t *)port->userdata;
+        filter_sys_t * const sys = p_filter->p_sys;
+
+#if TRACE_ALL
+        msg_Dbg(p_filter, "<<< %s: cmd=%d; flags=%#x, pts=%lld", __func__, buf->cmd, buf->flags, (long long) buf->pts);
+#endif
+        mmal_queue_put(sys->out_q, buf);
+#if TRACE_ALL
+        msg_Dbg(p_filter, ">>> %s: out Q len=%d", __func__, mmal_queue_length(sys->out_q));
+#endif
+        return;
     }
 
-    sys->input = sys->component->input[0];
-    sys->input->userdata = (struct MMAL_PORT_USERDATA_T *)filter;
-    if (filter->fmt_in.i_codec == VLC_CODEC_MMAL_OPAQUE)
-        sys->input->format->encoding = MMAL_ENCODING_OPAQUE;
-    sys->input->format->es->video.width = filter->fmt_in.video.i_width;
-    sys->input->format->es->video.height = filter->fmt_in.video.i_height;
-    sys->input->format->es->video.crop.x = 0;
-    sys->input->format->es->video.crop.y = 0;
-    sys->input->format->es->video.crop.width = filter->fmt_in.video.i_width;
-    sys->input->format->es->video.crop.height = filter->fmt_in.video.i_height;
-    sys->input->format->es->video.par.num = filter->fmt_in.video.i_sar_num;
-    sys->input->format->es->video.par.den = filter->fmt_in.video.i_sar_den;
+    mmal_buffer_header_reset(buf);   // User data stays intact so release will kill pic
+    mmal_buffer_header_release(buf);
+}
 
-    es_format_Copy(&filter->fmt_out, &filter->fmt_in);
-    filter->fmt_out.video.i_frame_rate *= 2;
 
-    status = mmal_port_format_commit(sys->input);
-    if (status != MMAL_SUCCESS) {
-        msg_Err(filter, "Failed to commit format for input port %s (status=%"PRIx32" %s)",
-                        sys->input->name, status, mmal_status_to_string(status));
-        ret = VLC_EGENERIC;
-        goto out;
-    }
-    sys->input->buffer_size = sys->input->buffer_size_recommended;
-    sys->input->buffer_num = sys->input->buffer_num_recommended;
 
-    if (filter->fmt_in.i_codec == VLC_CODEC_MMAL_OPAQUE) {
-        MMAL_PARAMETER_BOOLEAN_T zero_copy = {
-            { MMAL_PARAMETER_ZERO_COPY, sizeof(MMAL_PARAMETER_BOOLEAN_T) },
-            1
-        };
+static MMAL_STATUS_T fill_output_from_q(filter_t * const p_filter, filter_sys_t * const sys, MMAL_QUEUE_T * const q)
+{
+    MMAL_BUFFER_HEADER_T * out_buf;
 
-        status = mmal_port_parameter_set(sys->input, &zero_copy.hdr);
-        if (status != MMAL_SUCCESS) {
-           msg_Err(filter, "Failed to set zero copy on port %s (status=%"PRIx32" %s)",
-                    sys->input->name, status, mmal_status_to_string(status));
-           goto out;
+    while ((out_buf = mmal_queue_get(q)) != NULL)
+    {
+        MMAL_STATUS_T err;
+        if ((err = mmal_port_send_buffer(sys->output, out_buf)) != MMAL_SUCCESS)
+        {
+            msg_Err(p_filter, "Send buffer to output failed");
+            mmal_queue_put_back(q, out_buf);
+            return err;
         }
     }
+    return MMAL_SUCCESS;
+}
 
-    status = mmal_port_enable(sys->input, input_port_cb);
-    if (status != MMAL_SUCCESS) {
-        msg_Err(filter, "Failed to enable input port %s (status=%"PRIx32" %s)",
-                sys->input->name, status, mmal_status_to_string(status));
-        ret = VLC_EGENERIC;
-        goto out;
-    }
+// Output buffers may contain a pic ref on error or flush
+// Free it
+static MMAL_BOOL_T out_buffer_pre_release_cb(MMAL_BUFFER_HEADER_T *header, void *userdata)
+{
+    VLC_UNUSED(userdata);
 
-    sys->output = sys->component->output[0];
-    sys->output->userdata = (struct MMAL_PORT_USERDATA_T *)filter;
-    mmal_format_full_copy(sys->output->format, sys->input->format);
+    cma_buf_t * const cb = header->user_data;
+    header->user_data = NULL;
+    cma_buf_unref(cb);  // Copes fine with NULL
 
-    status = mmal_port_format_commit(sys->output);
-    if (status != MMAL_SUCCESS) {
-        msg_Err(filter, "Failed to commit format for output port %s (status=%"PRIx32" %s)",
-                        sys->input->name, status, mmal_status_to_string(status));
-        ret = VLC_EGENERIC;
-        goto out;
+    return MMAL_FALSE;
+}
+
+static inline unsigned int seq_inc(unsigned int x)
+{
+    return x + 1 >= 16 ? 1 : x + 1;
+}
+
+static inline unsigned int seq_delta(unsigned int sseq, unsigned int fseq)
+{
+    return fseq == 0 ? 0 : fseq <= sseq ? sseq - fseq : 15 - (fseq - sseq);
+}
+
+static picture_t *deinterlace(filter_t * p_filter, picture_t * p_pic)
+{
+    filter_sys_t * const sys = p_filter->p_sys;
+    picture_t *ret_pics = NULL;
+    MMAL_STATUS_T err;
+    MMAL_BUFFER_HEADER_T * out_buf = NULL;
+
+#if TRACE_ALL
+    msg_Dbg(p_filter, "<<< %s", __func__);
+#endif
+
+    if (hw_mmal_vlc_pic_to_mmal_fmt_update(sys->input->format, p_pic))
+    {
+        // ****** Breaks on opaque (at least)
+
+        if (sys->input->is_enabled)
+            mmal_port_disable(sys->input);
+#if 0
+        if (sys->output->is_enabled)
+            mmal_port_disable(sys->output);
+
+        mmal_format_full_copy(sys->output->format, sys->input->format);
+        mmal_port_format_commit(sys->output);
+        sys->output->buffer_num = 30;
+        sys->output->buffer_size = sys->input->buffer_size_recommended;
+        mmal_port_enable(sys->output, di_output_port_cb);
+#endif
+        if (mmal_port_format_commit(sys->input) != MMAL_SUCCESS)
+            msg_Err(p_filter, "Failed to update pic format");
+        sys->input->buffer_num = 30;
+        sys->input->buffer_size = sys->input->buffer_size_recommended;
+        mmal_log_dump_format(sys->input->format);
+    }
+
+    // Reenable stuff if the last thing we did was flush
+    // Output should always be enabled
+    if (!sys->input->is_enabled &&
+        (err = mmal_port_enable(sys->input, di_input_port_cb)) != MMAL_SUCCESS)
+    {
+        msg_Err(p_filter, "Input port reenable failed");
+        goto fail;
+    }
+
+    if (!sys->is_cma)
+    {
+        // Fill output from anything that has turned up in pool Q
+        if (hw_mmal_port_pool_ref_fill(sys->out_ppr) != MMAL_SUCCESS)
+        {
+            msg_Err(p_filter, "Out port fill fail");
+            goto fail;
+        }
     }
+    else
+    {
+        // We are expecting one in - one out so simply wedge a new bufer
+        // into the output port.  Flow control will happen on cma alloc.
+
+        if ((out_buf = mmal_queue_get(sys->out_pool->queue)) == NULL)
+        {
+            // Should never happen
+            msg_Err(p_filter, "Failed to get output buffer");
+            goto fail;
+        }
+        mmal_buffer_header_reset(out_buf);
 
-    sys->output->buffer_num = 3;
+        // Attach cma_buf to the buffer & ensure it is freed when the buffer is released
+        // On a good send callback the pic will be extracted to avoid this
+        mmal_buffer_header_pre_release_cb_set(out_buf, out_buffer_pre_release_cb, p_filter);
+
+        cma_buf_t * const cb = cma_buf_pool_alloc_buf(sys->cma_out_pool, sys->output->buffer_size);
+        if ((out_buf->user_data = cb) == NULL)  // Check & attach cb to buf
+        {
+            char dbuf0[5];
+            msg_Err(p_filter, "Failed to alloc CMA buf: fmt=%s, size=%d",
+                    str_fourcc(dbuf0, p_pic->format.i_chroma),
+                    sys->output->buffer_size);
+            goto fail;
+        }
+        const unsigned int vc_h = cma_buf_vc_handle(cb);  // Cannot coerce without going via variable
+        out_buf->data = (uint8_t *)vc_h;
+        out_buf->alloc_size = sys->output->buffer_size;
+
+#if TRACE_ALL
+        msg_Dbg(p_filter, "Out buf send: pic=%p, data=%p, user=%p, flags=%#x, len=%d/%d, pts=%lld",
+                p_pic, out_buf->data, out_buf->user_data, out_buf->flags,
+                out_buf->length, out_buf->alloc_size, (long long)out_buf->pts);
+#endif
 
-    if (filter->fmt_in.i_codec == VLC_CODEC_MMAL_OPAQUE) {
-        MMAL_PARAMETER_UINT32_T extra_buffers = {
-            { MMAL_PARAMETER_EXTRA_BUFFERS, sizeof(MMAL_PARAMETER_UINT32_T) },
-            5
-        };
-        status = mmal_port_parameter_set(sys->output, &extra_buffers.hdr);
-        if (status != MMAL_SUCCESS) {
-            msg_Err(filter, "Failed to set MMAL_PARAMETER_EXTRA_BUFFERS on output port (status=%"PRIx32" %s)",
-                    status, mmal_status_to_string(status));
-            goto out;
+        if ((err = mmal_port_send_buffer(sys->output, out_buf)) != MMAL_SUCCESS)
+        {
+            msg_Err(p_filter, "Send buffer to output failed");
+            goto fail;
         }
+        out_buf = NULL;
+    }
 
-        MMAL_PARAMETER_BOOLEAN_T zero_copy = {
-            { MMAL_PARAMETER_ZERO_COPY, sizeof(MMAL_PARAMETER_BOOLEAN_T) },
-            1
-        };
+    // Stuff into input
+    // We assume the BH is already set up with values reflecting pic date etc.
+    {
+        MMAL_BUFFER_HEADER_T * const pic_buf = hw_mmal_pic_buf_replicated(p_pic, sys->in_pool);
+
+        if (pic_buf == NULL)
+        {
+            msg_Err(p_filter, "Pic has not attached buffer");
+            goto fail;
+        }
 
-        status = mmal_port_parameter_set(sys->output, &zero_copy.hdr);
-        if (status != MMAL_SUCCESS) {
-           msg_Err(filter, "Failed to set zero copy on port %s (status=%"PRIx32" %s)",
-                    sys->output->name, status, mmal_status_to_string(status));
-           goto out;
+        picture_Release(p_pic);
+
+        // Add a sequence to the flags so we can track what we have actually
+        // deinterlaced
+        pic_buf->flags = (pic_buf->flags & ~(0xfU * MMAL_BUFFER_HEADER_FLAG_USER0)) | (sys->seq_in * (MMAL_BUFFER_HEADER_FLAG_USER0));
+        sys->seq_in = seq_inc(sys->seq_in);
+
+        if ((err = mmal_port_send_buffer(sys->input, pic_buf)) != MMAL_SUCCESS)
+        {
+            msg_Err(p_filter, "Send buffer to input failed");
+            mmal_buffer_header_release(pic_buf);
+            goto fail;
         }
     }
 
-    status = mmal_port_enable(sys->output, output_port_cb);
-    if (status != MMAL_SUCCESS) {
-        msg_Err(filter, "Failed to enable output port %s (status=%"PRIx32" %s)",
-                sys->output->name, status, mmal_status_to_string(status));
-        ret = VLC_EGENERIC;
-        goto out;
+    // Return anything that is in the out Q
+    {
+        picture_t ** pp_pic = &ret_pics;
+
+        // Advanced di has a 3 frame latency, so if the seq delta is greater
+        // than that then we are expecting at least two frames of output. Wait
+        // for one of those.
+        // seq_in is seq of the next frame we are going to submit (1-15, no 0)
+        // seq_out is last frame we removed from Q
+        // So after 4 frames sent (1st time we want to wait), 0 rx seq_in=5, seq_out=15, delta=5
+
+        while ((out_buf = (seq_delta(sys->seq_in, sys->seq_out) >= 5 ? mmal_queue_timedwait(sys->out_q, 1000) : mmal_queue_get(sys->out_q))) != NULL)
+        {
+            const unsigned int seq_out = (out_buf->flags / MMAL_BUFFER_HEADER_FLAG_USER0) & 0xf;
+            int rv;
+
+            picture_t * out_pic;
+
+            if (sys->is_cma)
+            {
+                // Alloc pic
+                if ((out_pic = filter_NewPicture(p_filter)) == NULL)
+                {
+                    // Can't alloc pic - just stop extraction
+                    mmal_queue_put_back(sys->out_q, out_buf);
+                    out_buf = NULL;
+                    msg_Warn(p_filter, "Failed to alloc new filter output pic");
+                    break;
+                }
+
+                // Extract cma_buf from buf & attach to pic
+                cma_buf_t * const cb = (cma_buf_t *)out_buf->user_data;
+                if ((rv = cma_buf_pic_attach(cb, out_pic)) != VLC_SUCCESS)
+                {
+                    char dbuf0[5];
+                    msg_Err(p_filter, "Failed to attach CMA to pic: fmt=%s err=%d",
+                            str_fourcc(dbuf0, out_pic->format.i_chroma),
+                            rv);
+                    // cb still attached to buffer and will be freed with it
+                    goto fail;
+                }
+                out_buf->user_data = NULL;
+
+                buf_to_pic_copy_props(out_pic, out_buf);
+
+                // Set pic data pointers from buf aux info now it has it
+                if ((rv = cma_pic_set_data(out_pic, sys->output->format, out_buf)) != VLC_SUCCESS)
+                {
+                    char dbuf0[5];
+                    msg_Err(p_filter, "Failed to set data: fmt=%s, rv=%d",
+                            str_fourcc(dbuf0, sys->output->format->encoding),
+                            rv);
+                }
+
+                out_buf->user_data = NULL;  // Responsability for this pic no longer with buffer
+                mmal_buffer_header_release(out_buf);
+            }
+            else
+            {
+                out_pic = di_alloc_opaque(p_filter, out_buf);
+
+                if (out_pic == NULL) {
+                    msg_Warn(p_filter, "Failed to alloc new filter output pic");
+                    mmal_queue_put_back(sys->out_q, out_buf);  // Wedge buf back into Q in the hope we can alloc a pic later
+                    out_buf = NULL;
+                    break;
+                }
+            }
+            out_buf = NULL;  // Now attached to pic or recycled
+
+#if TRACE_ALL
+            msg_Dbg(p_filter, "-- %s: Q pic=%p: seq_in=%d, seq_out=%d, delta=%d", __func__, out_pic, sys->seq_in, seq_out, seq_delta(sys->seq_in, seq_out));
+#endif
+
+            *pp_pic = out_pic;
+            pp_pic = &out_pic->p_next;
+
+            // Ignore 0 seqs
+            // Don't think these should actually happen
+            if (seq_out != 0)
+                sys->seq_out = seq_out;
+        }
+
+        // Crash on lockup
+        assert(ret_pics != NULL || seq_delta(sys->seq_in, sys->seq_out) < 5);
     }
 
-    status = mmal_component_enable(sys->component);
-    if (status != MMAL_SUCCESS) {
-        msg_Err(filter, "Failed to enable component %s (status=%"PRIx32" %s)",
-                sys->component->name, status, mmal_status_to_string(status));
-        ret = VLC_EGENERIC;
-        goto out;
+#if TRACE_ALL
+    msg_Dbg(p_filter, ">>> %s: pic=%p", __func__, ret_pics);
+#endif
+
+    return ret_pics;
+
+fail:
+    if (out_buf != NULL)
+        mmal_buffer_header_release(out_buf);
+    picture_Release(p_pic);
+    return NULL;
+}
+
+static void di_flush(filter_t *p_filter)
+{
+    filter_sys_t * const sys = p_filter->p_sys;
+
+#if TRACE_ALL
+    msg_Dbg(p_filter, "<<< %s", __func__);
+#endif
+
+    if (sys->input != NULL && sys->input->is_enabled)
+        mmal_port_disable(sys->input);
+
+    if (sys->output != NULL && sys->output->is_enabled)
+    {
+        if (sys->is_cma)
+        {
+            MMAL_BUFFER_HEADER_T * buf;
+            mmal_port_disable(sys->output);
+            while ((buf = mmal_queue_get(sys->out_q)) != NULL)
+                mmal_buffer_header_release(buf);
+        }
+        else
+        {
+            // Wedge anything we've got into the output port as that will free the underlying buffers
+            fill_output_from_q(p_filter, sys, sys->out_q);
+
+            mmal_port_disable(sys->output);
+
+            // If that dumped anything real into the out_q then have another go
+            if (mmal_queue_length(sys->out_q) != 0)
+            {
+                mmal_port_enable(sys->output, di_output_port_cb);
+                fill_output_from_q(p_filter, sys, sys->out_q);
+                mmal_port_disable(sys->output);
+                // Out q should now be empty & should remain so until the input is reenabled
+            }
+        }
+        mmal_port_enable(sys->output, di_output_port_cb);
+
+        // Leaving the input disabled is fine - but we want to leave the output enabled
+        // so we can retrieve buffers that are still bound to pictures
     }
 
-    sys->filtered_pictures = mmal_queue_create();
+    sys->seq_in = 1;
+    sys->seq_out = 15;
 
-    filter->pf_video_filter = deinterlace;
-    filter->pf_flush = flush;
+#if TRACE_ALL
+    msg_Dbg(p_filter, ">>> %s", __func__);
+#endif
+}
 
-    vlc_sem_init(&sys->sem, 0);
 
-out:
-    if (ret != VLC_SUCCESS)
-        Close(filter);
+static void pass_flush(filter_t *p_filter)
+{
+    // Nothing to do
+    VLC_UNUSED(p_filter);
+}
 
-    return ret;
+static picture_t * pass_deinterlace(filter_t * p_filter, picture_t * p_pic)
+{
+    VLC_UNUSED(p_filter);
+
+    p_pic->b_progressive = true;
+    return p_pic;
 }
 
-static void Close(filter_t *filter)
+
+static void control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
 {
-    filter_sys_t *sys = filter->p_sys;
-    MMAL_BUFFER_HEADER_T *buffer;
+    filter_t *filter = (filter_t *)port->userdata;
+    MMAL_STATUS_T status;
 
-    if (!sys)
+    if (buffer->cmd == MMAL_EVENT_ERROR) {
+        status = *(uint32_t *)buffer->data;
+        msg_Err(filter, "MMAL error %"PRIx32" \"%s\"", status,
+                mmal_status_to_string(status));
+    }
+
+    mmal_buffer_header_reset(buffer);
+    mmal_buffer_header_release(buffer);
+}
+
+static void CloseMmalDeinterlace(filter_t *filter)
+{
+    filter_sys_t * const sys = filter->p_sys;
+
+#if TRACE_ALL
+    msg_Dbg(filter, "<<< %s", __func__);
+#endif
+
+    if (sys == NULL)
         return;
 
-    if (sys->component && sys->component->control->is_enabled)
-        mmal_port_disable(sys->component->control);
+    if (sys->use_passthrough)
+    {
+        free(sys);
+        return;
+    }
 
-    if (sys->input && sys->input->is_enabled)
-        mmal_port_disable(sys->input);
+    di_flush(filter);
 
-    if (sys->output && sys->output->is_enabled)
-        mmal_port_disable(sys->output);
+    if (sys->component && sys->component->control->is_enabled)
+        mmal_port_disable(sys->component->control);
 
     if (sys->component && sys->component->is_enabled)
         mmal_component_disable(sys->component);
 
-    while ((buffer = mmal_queue_get(sys->filtered_pictures))) {
-        picture_t *pic = (picture_t *)buffer->user_data;
-        picture_Release(pic);
+    if (sys->in_pool != NULL)
+        mmal_pool_destroy(sys->in_pool);
+
+    hw_mmal_port_pool_ref_release(sys->out_ppr, false);
+    // Once we exit filter & sys are invalid so mark as such
+    if (sys->output != NULL)
+        sys->output->userdata = NULL;
+
+    if (sys->is_cma)
+    {
+        if (sys->output && sys->output->is_enabled)
+            mmal_port_disable(sys->output);
+
+        cma_buf_pool_deletez(&sys->cma_out_pool);
+
+        if (sys->out_pool != NULL)
+            mmal_pool_destroy(sys->out_pool);
     }
 
-    if (sys->filtered_pictures)
-        mmal_queue_destroy(sys->filtered_pictures);
+    if (sys->out_q != NULL)
+        mmal_queue_destroy(sys->out_q);
 
     if (sys->component)
         mmal_component_release(sys->component);
 
-    vlc_sem_destroy(&sys->sem);
+    cma_vcsm_exit(sys->vcsm_init_type);
+
     free(sys);
+}
+
 
-    bcm_host_deinit();
+static bool is_fmt_valid_in(const vlc_fourcc_t fmt)
+{
+    return fmt == VLC_CODEC_MMAL_OPAQUE ||
+           fmt == VLC_CODEC_MMAL_ZC_I420 ||
+           fmt == VLC_CODEC_MMAL_ZC_SAND8;
 }
 
-static int send_output_buffer(filter_t *filter)
+static int OpenMmalDeinterlace(filter_t *filter)
 {
-    filter_sys_t *sys = filter->p_sys;
-    MMAL_BUFFER_HEADER_T *buffer;
+    int32_t frame_duration = filter->fmt_in.video.i_frame_rate != 0 ?
+            CLOCK_FREQ * filter->fmt_in.video.i_frame_rate_base /
+            filter->fmt_in.video.i_frame_rate : 0;
+
+    int ret = VLC_EGENERIC;
     MMAL_STATUS_T status;
-    picture_t *picture;
-    int ret = 0;
+    filter_sys_t *sys;
+
+    msg_Dbg(filter, "<<< %s", __func__);
+
+    if (!is_fmt_valid_in(filter->fmt_in.video.i_chroma) ||
+        filter->fmt_out.video.i_chroma != filter->fmt_in.video.i_chroma)
+        return VLC_EGENERIC;
 
-    if (!sys->output->is_enabled) {
-        ret = VLC_EGENERIC;
-        goto out;
+    sys = calloc(1, sizeof(filter_sys_t));
+    if (!sys)
+        return VLC_ENOMEM;
+    filter->p_sys = sys;
+
+    sys->seq_in = 1;
+    sys->seq_out = 15;
+    sys->is_cma = is_cma_buf_pic_chroma(filter->fmt_out.video.i_chroma);
+
+    if ((sys->vcsm_init_type = cma_vcsm_init()) == VCSM_INIT_NONE) {
+        msg_Err(filter, "VCSM init failed");
+        goto fail;
+    }
+
+    if (rpi_is_model_pi4())
+    {
+        sys->half_rate = true;
+        sys->use_qpu = false;
+        sys->use_fast = true;
+    }
+    else
+    {
+        sys->half_rate = false;
+        sys->use_qpu = true;
+        sys->use_fast = false;
+    }
+    sys->use_passthrough = false;
+
+    if (filter->fmt_in.video.i_width * filter->fmt_in.video.i_height > 768 * 576)
+    {
+        // We get stressed if we have to try too hard - so make life easier
+        sys->half_rate = true;
+        // Also check we actually have enough memory to do this
+        // Memory always comes from GPU if Opaque
+        // Assume we have plenty of memory if it comes from CMA
+        if ((!sys->is_cma || sys->vcsm_init_type == VCSM_INIT_LEGACY) &&
+            hw_mmal_get_gpu_mem() < (96 << 20))
+        {
+            sys->use_passthrough = true;
+            msg_Warn(filter, "Deinterlace bypassed due to lack of GPU memory");
+        }
     }
 
-    picture = filter_NewPicture(filter);
-    if (!picture) {
-        msg_Warn(filter, "Failed to get new picture");
-        ret = -1;
-        goto out;
+    if (var_InheritBool(filter, MMAL_DEINTERLACE_NO_QPU))
+        sys->use_qpu = false;
+    if (var_InheritBool(filter, MMAL_DEINTERLACE_ADV))
+    {
+        sys->use_fast = false;
+        sys->use_passthrough = false;
+    }
+    if (var_InheritBool(filter, MMAL_DEINTERLACE_FAST))
+    {
+        sys->use_fast = true;
+        sys->use_passthrough = false;
+    }
+    if (var_InheritBool(filter, MMAL_DEINTERLACE_NONE))
+        sys->use_passthrough = true;
+    if (var_InheritBool(filter, MMAL_DEINTERLACE_FULL_RATE))
+        sys->half_rate = false;
+    if (var_InheritBool(filter, MMAL_DEINTERLACE_HALF_RATE))
+        sys->half_rate = true;
+
+    if (sys->use_passthrough)
+    {
+        filter->pf_video_filter = pass_deinterlace;
+        filter->pf_flush = pass_flush;
+        // Don't need VCSM - get rid of it now
+        cma_vcsm_exit(sys->vcsm_init_type);
+        sys->vcsm_init_type = VCSM_INIT_NONE;
+        return 0;
+    }
+
+    {
+        char dbuf0[5], dbuf1[5];
+        msg_Dbg(filter, "%s: %s,%dx%d [(%d,%d) %d/%d] -> %s,%dx%d [(%d,%d) %dx%d]: %s %s %s", __func__,
+                str_fourcc(dbuf0, filter->fmt_in.video.i_chroma),
+                filter->fmt_in.video.i_width, filter->fmt_in.video.i_height,
+                filter->fmt_in.video.i_x_offset, filter->fmt_in.video.i_y_offset,
+                filter->fmt_in.video.i_visible_width, filter->fmt_in.video.i_visible_height,
+                str_fourcc(dbuf1, filter->fmt_out.video.i_chroma),
+                filter->fmt_out.video.i_width, filter->fmt_out.video.i_height,
+                filter->fmt_out.video.i_x_offset, filter->fmt_out.video.i_y_offset,
+                filter->fmt_out.video.i_visible_width, filter->fmt_out.video.i_visible_height,
+                sys->use_qpu ? "QPU" : "VPU",
+                sys->use_fast ? "FAST" : "ADV",
+                sys->use_passthrough ? "PASS" : sys->half_rate ? "HALF" : "FULL");
     }
-    picture->format.i_frame_rate = filter->fmt_out.video.i_frame_rate;
-    picture->format.i_frame_rate_base = filter->fmt_out.video.i_frame_rate_base;
 
-    buffer = picture->p_sys->buffer;
-    buffer->user_data = picture;
-    buffer->cmd = 0;
+    status = mmal_component_create(MMAL_COMPONENT_DEFAULT_DEINTERLACE, &sys->component);
+    if (status != MMAL_SUCCESS) {
+        msg_Err(filter, "Failed to create MMAL component %s (status=%"PRIx32" %s)",
+                MMAL_COMPONENT_DEFAULT_DEINTERLACE, status, mmal_status_to_string(status));
+        goto fail;
+    }
 
-    mmal_picture_lock(picture);
+    {
+        const MMAL_PARAMETER_IMAGEFX_PARAMETERS_T imfx_param = {
+            { MMAL_PARAMETER_IMAGE_EFFECT_PARAMETERS, sizeof(imfx_param) },
+            sys->use_fast ?
+                MMAL_PARAM_IMAGEFX_DEINTERLACE_FAST :
+                MMAL_PARAM_IMAGEFX_DEINTERLACE_ADV,
+            4,
+            { 5 /* Frame type: mixed */, frame_duration, sys->half_rate, sys->use_qpu }
+        };
 
-    status = mmal_port_send_buffer(sys->output, buffer);
+        status = mmal_port_parameter_set(sys->component->output[0], &imfx_param.hdr);
+        if (status != MMAL_SUCCESS) {
+            msg_Err(filter, "Failed to configure MMAL component %s (status=%"PRIx32" %s)",
+                    MMAL_COMPONENT_DEFAULT_DEINTERLACE, status, mmal_status_to_string(status));
+            goto fail;
+        }
+    }
+
+    sys->component->control->userdata = (struct MMAL_PORT_USERDATA_T *)filter;
+    status = mmal_port_enable(sys->component->control, control_port_cb);
     if (status != MMAL_SUCCESS) {
-        msg_Err(filter, "Failed to send buffer to output port (status=%"PRIx32" %s)",
-                status, mmal_status_to_string(status));
-        mmal_buffer_header_release(buffer);
-        picture_Release(picture);
-        ret = -1;
-    } else {
-        atomic_fetch_add(&sys->output_in_transit, 1);
-        vlc_sem_post(&sys->sem);
+        msg_Err(filter, "Failed to enable control port %s (status=%"PRIx32" %s)",
+                sys->component->control->name, status, mmal_status_to_string(status));
+        goto fail;
     }
 
-out:
-    return ret;
-}
+    sys->input = sys->component->input[0];
+    sys->input->userdata = (struct MMAL_PORT_USERDATA_T *)filter;
+    sys->input->format->encoding = vlc_to_mmal_video_fourcc(&filter->fmt_in.video);
+    hw_mmal_vlc_fmt_to_mmal_fmt(sys->input->format, &filter->fmt_in.video);
 
-static void fill_output_port(filter_t *filter)
-{
-    filter_sys_t *sys = filter->p_sys;
-    /* allow at least 2 buffers in transit */
-    unsigned max_buffers_in_transit = __MAX(2, MIN_NUM_BUFFERS_IN_TRANSIT);
-    int buffers_available = sys->output->buffer_num -
-        atomic_load(&sys->output_in_transit) -
-        mmal_queue_length(sys->filtered_pictures);
-    int buffers_to_send = max_buffers_in_transit - sys->output_in_transit;
-    int i;
+    es_format_Copy(&filter->fmt_out, &filter->fmt_in);
+    if (!sys->half_rate)
+        filter->fmt_out.video.i_frame_rate *= 2;
 
-    if (buffers_to_send > buffers_available)
-        buffers_to_send = buffers_available;
+    status = mmal_port_format_commit(sys->input);
+    if (status != MMAL_SUCCESS) {
+        msg_Err(filter, "Failed to commit format for input port %s (status=%"PRIx32" %s)",
+                        sys->input->name, status, mmal_status_to_string(status));
+        goto fail;
+    }
+    sys->input->buffer_size = sys->input->buffer_size_recommended;
+    sys->input->buffer_num = 30;
+//    sys->input->buffer_num = sys->input->buffer_num_recommended;
 
-#ifndef NDEBUG
-    msg_Dbg(filter, "Send %d buffers to output port (available: %d, in_transit: %d, buffer_num: %d)",
-                    buffers_to_send, buffers_available, sys->output_in_transit,
-                    sys->output->buffer_num);
-#endif
-    for (i = 0; i < buffers_to_send; ++i) {
-        if (send_output_buffer(filter) < 0)
-            break;
+    if ((sys->in_pool = mmal_pool_create(sys->input->buffer_num, 0)) == NULL)
+    {
+        msg_Err(filter, "Failed to create input pool");
+        goto fail;
     }
-}
 
-static picture_t *deinterlace(filter_t *filter, picture_t *picture)
-{
-    filter_sys_t *sys = filter->p_sys;
-    MMAL_BUFFER_HEADER_T *buffer;
-    picture_t *out_picture = NULL;
-    picture_t *ret = NULL;
-    MMAL_STATUS_T status;
-    unsigned i = 0;
+    status = port_parameter_set_bool(sys->input, MMAL_PARAMETER_ZERO_COPY, true);
+    if (status != MMAL_SUCCESS) {
+       msg_Err(filter, "Failed to set zero copy on port %s (status=%"PRIx32" %s)",
+                sys->input->name, status, mmal_status_to_string(status));
+       goto fail;
+    }
 
-    fill_output_port(filter);
+    status = mmal_port_enable(sys->input, di_input_port_cb);
+    if (status != MMAL_SUCCESS) {
+        msg_Err(filter, "Failed to enable input port %s (status=%"PRIx32" %s)",
+                sys->input->name, status, mmal_status_to_string(status));
+        goto fail;
+    }
 
-    buffer = picture->p_sys->buffer;
-    buffer->user_data = picture;
-    buffer->pts = picture->date;
-    buffer->cmd = 0;
 
-    if (!picture->p_sys->displayed) {
-        status = mmal_port_send_buffer(sys->input, buffer);
-        if (status != MMAL_SUCCESS) {
-            msg_Err(filter, "Failed to send buffer to input port (status=%"PRIx32" %s)",
-                    status, mmal_status_to_string(status));
-            picture_Release(picture);
-        } else {
-            picture->p_sys->displayed = true;
-            atomic_fetch_add(&sys->input_in_transit, 1);
-            vlc_sem_post(&sys->sem);
-        }
-    } else {
-        picture_Release(picture);
-    }
-
-    /*
-     * Send output buffers
-     */
-    while(atomic_load(&sys->started) && i < 2) {
-        if (buffer = mmal_queue_timedwait(sys->filtered_pictures, 2000)) {
-            i++;
-            if (!out_picture) {
-                out_picture = (picture_t *)buffer->user_data;
-                ret = out_picture;
-            } else {
-                out_picture->p_next = (picture_t *)buffer->user_data;
-                out_picture = out_picture->p_next;
-            }
-            out_picture->date = buffer->pts;
-        } else {
-            msg_Dbg(filter, "Failed waiting for filtered picture");
-            break;
-        }
+    if ((sys->out_q = mmal_queue_create()) == NULL)
+    {
+        msg_Err(filter, "Failed to create out Q");
+        goto fail;
     }
-    if (out_picture)
-        out_picture->p_next = NULL;
 
-    return ret;
-}
-
-static void flush(filter_t *filter)
-{
-    filter_sys_t *sys = filter->p_sys;
-    MMAL_BUFFER_HEADER_T *buffer;
+    sys->output = sys->component->output[0];
+    mmal_format_full_copy(sys->output->format, sys->input->format);
 
-    msg_Dbg(filter, "flush deinterlace filter");
+    if (!sys->is_cma)
+    {
+        if ((status = hw_mmal_opaque_output(VLC_OBJECT(filter), &sys->out_ppr, sys->output, 5, di_output_port_cb)) != MMAL_SUCCESS)
+            goto fail;
+    }
+    else
+    {
+        // CMA stuff
+        sys->output->userdata = (struct MMAL_PORT_USERDATA_T *)filter;
+
+        if ((sys->cma_out_pool = cma_buf_pool_new(8, 8, true, "deinterlace")) == NULL)
+        {
+            msg_Err(filter, "Failed to alloc cma buf pool");
+            goto fail;
+        }
 
-    msg_Dbg(filter, "flush: flush ports (input: %d, output: %d in transit)",
-            sys->input_in_transit, sys->output_in_transit);
-    mmal_port_flush(sys->output);
-    mmal_port_flush(sys->input);
-
-    msg_Dbg(filter, "flush: wait for all buffers to be returned");
-    while (atomic_load(&sys->input_in_transit) ||
-            atomic_load(&sys->output_in_transit))
-        vlc_sem_wait(&sys->sem);
-
-    while ((buffer = mmal_queue_get(sys->filtered_pictures))) {
-        picture_t *pic = (picture_t *)buffer->user_data;
-        msg_Dbg(filter, "flush: release already filtered pic %p",
-                (void *)pic);
-        picture_Release(pic);
-    }
-    atomic_store(&sys->started, false);
-    msg_Dbg(filter, "flush: done");
-}
+        // Rate control done by CMA in flight logic, so have "inexhaustable" pool here
+        if ((sys->out_pool = mmal_pool_create(30, 0)) == NULL)
+        {
+            msg_Err(filter, "Failed to alloc out pool");
+            goto fail;
+        }
 
-static void control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
-{
-    filter_t *filter = (filter_t *)port->userdata;
-    MMAL_STATUS_T status;
+        port_parameter_set_bool(sys->output, MMAL_PARAMETER_ZERO_COPY, true);
 
-    if (buffer->cmd == MMAL_EVENT_ERROR) {
-        status = *(uint32_t *)buffer->data;
-        msg_Err(filter, "MMAL error %"PRIx32" \"%s\"", status,
-                mmal_status_to_string(status));
-    }
+        if ((status = mmal_port_format_commit(sys->output)) != MMAL_SUCCESS)
+        {
+            msg_Err(filter, "Output port format commit failed");
+            goto fail;
+        }
 
-    mmal_buffer_header_release(buffer);
-}
+        sys->output->buffer_num = 30;
+        sys->output->buffer_size = sys->output->buffer_size_recommended;
 
-static void input_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
-{
-    picture_t *picture = (picture_t *)buffer->user_data;
-    filter_t *filter = (filter_t *)port->userdata;
-    filter_sys_t *sys = filter->p_sys;
+        // CB just drops all bufs into out_q
+        if ((status = mmal_port_enable(sys->output, di_output_port_cb)) != MMAL_SUCCESS)
+        {
+            msg_Err(filter, "Failed to enable output port %s (status=%"PRIx32" %s)",
+                    sys->output->name, status, mmal_status_to_string(status));
+            goto fail;
+        }
+    }
 
-    if (picture) {
-        picture_Release(picture);
-    } else {
-        msg_Warn(filter, "Got buffer without picture on input port - OOOPS");
-        mmal_buffer_header_release(buffer);
+    status = mmal_component_enable(sys->component);
+    if (status != MMAL_SUCCESS) {
+        msg_Err(filter, "Failed to enable component %s (status=%"PRIx32" %s)",
+                sys->component->name, status, mmal_status_to_string(status));
+        goto fail;
     }
 
-    atomic_fetch_sub(&sys->input_in_transit, 1);
-    vlc_sem_post(&sys->sem);
+    filter->pf_video_filter = deinterlace;
+    filter->pf_flush = di_flush;
+    return 0;
+
+fail:
+    CloseMmalDeinterlace(filter);
+    return ret;
 }
 
-static void output_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
-{
-    filter_t *filter = (filter_t *)port->userdata;
-    filter_sys_t *sys = filter->p_sys;
-    picture_t *picture;
+vlc_module_begin()
+    set_shortname(N_("MMAL deinterlace"))
+    set_description(N_("MMAL-based deinterlace filter plugin"))
+    set_capability("video filter", 900)
+    set_category(CAT_VIDEO)
+    set_subcategory(SUBCAT_VIDEO_VFILTER)
+    set_callbacks(OpenMmalDeinterlace, CloseMmalDeinterlace)
+    add_shortcut("deinterlace")
+    add_bool(MMAL_DEINTERLACE_NO_QPU, false, MMAL_DEINTERLACE_NO_QPU_TEXT,
+                    MMAL_DEINTERLACE_NO_QPU_LONGTEXT, true);
+    add_bool(MMAL_DEINTERLACE_ADV, false, MMAL_DEINTERLACE_ADV_TEXT,
+                    MMAL_DEINTERLACE_ADV_LONGTEXT, true);
+    add_bool(MMAL_DEINTERLACE_FAST, false, MMAL_DEINTERLACE_FAST_TEXT,
+                    MMAL_DEINTERLACE_FAST_LONGTEXT, true);
+    add_bool(MMAL_DEINTERLACE_NONE, false, MMAL_DEINTERLACE_NONE_TEXT,
+                    MMAL_DEINTERLACE_NONE_LONGTEXT, true);
+    add_bool(MMAL_DEINTERLACE_HALF_RATE, false, MMAL_DEINTERLACE_HALF_RATE_TEXT,
+                    MMAL_DEINTERLACE_HALF_RATE_LONGTEXT, true);
+    add_bool(MMAL_DEINTERLACE_FULL_RATE, false, MMAL_DEINTERLACE_FULL_RATE_TEXT,
+                    MMAL_DEINTERLACE_FULL_RATE_LONGTEXT, true);
+
+vlc_module_end()
+
 
-    if (buffer->cmd == 0) {
-        if (buffer->length > 0) {
-            atomic_store(&sys->started, true);
-            mmal_queue_put(sys->filtered_pictures, buffer);
-            picture = (picture_t *)buffer->user_data;
-        } else {
-            picture = (picture_t *)buffer->user_data;
-            picture_Release(picture);
-        }
-
-        atomic_fetch_sub(&sys->output_in_transit, 1);
-        vlc_sem_post(&sys->sem);
-    } else if (buffer->cmd == MMAL_EVENT_FORMAT_CHANGED) {
-        msg_Warn(filter, "MMAL_EVENT_FORMAT_CHANGED seen but not handled");
-        mmal_buffer_header_release(buffer);
-    } else {
-        mmal_buffer_header_release(buffer);
-    }
-}
--- /dev/null
+++ b/modules/hw/mmal/mmal_avcodec.c
@@ -0,0 +1,2172 @@
+/*****************************************************************************
+ * video.c: video decoder using the libavcodec library
+ *****************************************************************************
+ * Copyright (C) 1999-2001 VLC authors and VideoLAN
+ * $Id$
+ *
+ * Authors: Laurent Aimar <fenrir@via.ecp.fr>
+ *          Gildas Bazin <gbazin@videolan.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include "config.h"
+
+#include <vlc_common.h>
+#include <vlc_codec.h>
+#include <vlc_avcodec.h>
+#include <vlc_cpu.h>
+#include <vlc_atomic.h>
+#include <assert.h>
+
+#include <libavcodec/avcodec.h>
+#include <libavutil/mem.h>
+#include <libavutil/pixdesc.h>
+#if (LIBAVUTIL_VERSION_MICRO >= 100 && LIBAVUTIL_VERSION_INT >= AV_VERSION_INT( 55, 16, 101 ) )
+#include <libavutil/mastering_display_metadata.h>
+#endif
+
+//#include "avcodec.h"
+//#include "va.h"
+
+#include <vlc_plugin.h>
+#include <libavutil/rpi_sand_fns.h>
+#include <libavcodec/rpi_zc.h>
+#include "../../codec/cc.h"
+#include "../../codec/avcodec/avcommon.h"  // ??? Beware over inclusion
+#include "mmal_cma.h"
+#include "mmal_picture.h"
+
+#define TRACE_ALL 0
+
+#define BUFFERS_IN_FLIGHT       5       // Default max value for in flight buffers
+#define BUFFERS_IN_FLIGHT_UHD   3       // Fewer if very big
+
+#define MMAL_AVCODEC_BUFFERS "mmal-avcodec-buffers"
+#define MMAL_AVCODEC_BUFFERS_TEXT N_("In flight buffer count before blocking.")
+#define MMAL_AVCODEC_BUFFERS_LONGTEXT N_("In flight buffer count before blocking. " \
+"Beware that incautious changing of this can lead to lockup. " \
+"Zero will disable the module.")
+
+
+// Fwd declarations required due to wanting to avoid reworking the original
+// code too much
+static void MmalAvcodecCloseDecoder( vlc_object_t *obj );
+
+
+/*****************************************************************************
+ * decoder_sys_t : decoder descriptor
+ *****************************************************************************/
+struct decoder_sys_t
+{
+    AVCodecContext *p_context;
+    const AVCodec  *p_codec;
+
+    /* Video decoder specific part */
+    date_t  pts;
+
+    /* Closed captions for decoders */
+    cc_data_t cc;
+
+    /* for frame skipping algo */
+    bool b_hurry_up;
+    bool b_show_corrupted;
+    bool b_from_preroll;
+    enum AVDiscard i_skip_frame;
+
+    /* how many decoded frames are late */
+    int     i_late_frames;
+    mtime_t i_late_frames_start;
+    mtime_t i_last_late_delay;
+
+    /* for direct rendering */
+    bool        b_direct_rendering;
+    atomic_bool b_dr_failure;
+
+    /* Hack to force display of still pictures */
+    bool b_first_frame;
+
+
+    /* */
+    bool palette_sent;
+
+    /* VA API */
+//    vlc_va_t *p_va;
+    enum PixelFormat pix_fmt;
+    int profile;
+    int level;
+
+    vlc_sem_t sem_mt;
+
+    // Rpi vars
+    cma_buf_pool_t * cma_pool;
+    bool pool_alloc_1;
+    vcsm_init_type_t vcsm_init_type;
+    int cma_in_flight_max;
+    // Debug
+    decoder_t * p_dec;
+};
+
+
+static vlc_fourcc_t
+ZcFindVlcChroma(const int i_ffmpeg_chroma)
+{
+    switch (i_ffmpeg_chroma)
+    {
+        // This is all we claim to deal with
+        // In theory RGB should be doable within our current framework
+        case AV_PIX_FMT_YUV420P:
+            return VLC_CODEC_MMAL_ZC_I420;
+        case AV_PIX_FMT_SAND128:
+        case AV_PIX_FMT_RPI4_8:
+            return VLC_CODEC_MMAL_ZC_SAND8;
+        case AV_PIX_FMT_SAND64_10:
+            return VLC_CODEC_MMAL_ZC_SAND10;
+        case AV_PIX_FMT_RPI4_10:
+            return VLC_CODEC_MMAL_ZC_SAND30;
+        default:
+            break;
+    }
+    return 0;
+}
+
+// Pix Fmt conv for MMal
+// video_fromat from ffmpeg pic_fmt
+static int
+ZcGetVlcChroma( video_format_t *fmt, int i_ffmpeg_chroma )
+{
+    fmt->i_rmask = 0;
+    fmt->i_gmask = 0;
+    fmt->i_bmask = 0;
+    fmt->i_chroma = ZcFindVlcChroma(i_ffmpeg_chroma);
+
+    return fmt->i_chroma == 0 ? -1 : 0;
+}
+
+
+// Format chooser is way simpler than vlc
+static enum PixelFormat
+ZcGetFormat(AVCodecContext *p_context, const enum PixelFormat *pi_fmt)
+{
+    enum PixelFormat swfmt = avcodec_default_get_format(p_context, pi_fmt);
+    for (size_t i = 0; pi_fmt[i] != AV_PIX_FMT_NONE; i++)
+    {
+        if (ZcFindVlcChroma(pi_fmt[i]) != 0)
+            return pi_fmt[i];
+    }
+    return swfmt;
+}
+
+
+static void cma_avbuf_pool_free(void * v)
+{
+    cma_buf_unref(v);
+}
+
+static unsigned int zc_buf_vcsm_handle(void * v)
+{
+    return cma_buf_vcsm_handle(v);
+}
+
+static unsigned int zc_buf_vc_handle(void * v)
+{
+    return cma_buf_vc_handle(v);
+}
+
+static void * zc_buf_map_arm(void * v)
+{
+    return cma_buf_addr(v);
+}
+
+static unsigned int zc_buf_map_vc(void * v)
+{
+    return cma_buf_vc_addr(v);
+}
+
+
+
+static const av_rpi_zc_buf_fn_tab_t zc_buf_fn_tab = {
+    .free = cma_avbuf_pool_free,
+
+    .vcsm_handle = zc_buf_vcsm_handle,
+    .vc_handle = zc_buf_vc_handle,
+    .map_arm = zc_buf_map_arm,
+    .map_vc = zc_buf_map_vc
+};
+
+
+static AVBufferRef *
+zc_alloc_buf(void * v, size_t size, const AVRpiZcFrameGeometry * geo)
+{
+    decoder_t * const dec = v;
+    decoder_sys_t * const sys = dec->p_sys;
+
+    VLC_UNUSED(geo);
+
+    assert(sys != NULL);
+
+    const unsigned int dec_pool_req = av_rpi_zc_get_decoder_pool_size(sys->p_context->opaque);
+    if (dec_pool_req != 0)
+    {
+        cma_buf_pool_resize(sys->cma_pool, dec_pool_req + sys->cma_in_flight_max, sys->cma_in_flight_max);
+
+        if (!sys->pool_alloc_1)
+        {
+            sys->pool_alloc_1 = true;
+            msg_Dbg(dec, "Pool size: (%d+%d) * %zd", dec_pool_req, sys->cma_in_flight_max, size);
+            if (cma_buf_pool_fill(sys->cma_pool, size) != 0)
+                msg_Warn(dec, "Failed to preallocate decoder pool (%d+%d) * %zd", dec_pool_req, sys->cma_in_flight_max, size);
+        }
+    }
+
+    void * const cmabuf = cma_buf_pool_alloc_buf(sys->cma_pool, size);
+
+    if (cmabuf == NULL)
+    {
+        msg_Err(dec, "CMA buf pool alloc buf failed");
+        return NULL;
+    }
+
+    AVBufferRef *const avbuf = av_rpi_zc_buf(cma_buf_size(cmabuf), 0, cmabuf, &zc_buf_fn_tab);
+
+    if (avbuf == NULL)
+    {
+        msg_Err(dec, "av_rpi_zc_buf failed");
+        cma_buf_unref(cmabuf);
+        return NULL;
+    }
+
+    return avbuf;
+}
+
+static void
+zc_free_pool(void * v)
+{
+    decoder_t * const dec = v;
+    cma_buf_pool_delete(dec->p_sys->cma_pool);
+}
+
+
+static const uint8_t shift_01[] = {0,1,1,1};
+static const uint8_t pb_1[] = {1,1,1,1};
+static const uint8_t pb_12[] = {1,2,2,2};
+static const uint8_t pb_24[] = {2,4,4,4};
+static const uint8_t pb_4[] = {4,4,4,4};
+
+static int set_pic_from_frame(picture_t * const pic, const AVFrame * const frame)
+{
+    const uint8_t * hs = shift_01;
+    const uint8_t * ws = shift_01;
+    const uint8_t * pb = pb_1;
+
+    switch (pic->format.i_chroma)
+    {
+        case VLC_CODEC_MMAL_ZC_RGB32:
+            pic->i_planes = 1;
+            pb = pb_4;
+            break;
+        case VLC_CODEC_MMAL_ZC_I420:
+            pic->i_planes = 3;
+            break;
+        case VLC_CODEC_MMAL_ZC_SAND8:
+            pic->i_planes = 2;
+            pb = pb_12;
+            break;
+        case VLC_CODEC_MMAL_ZC_SAND10:
+        case VLC_CODEC_MMAL_ZC_SAND30:  // Lies: SAND30 is "special"
+            pic->i_planes = 2;
+            pb = pb_24;
+            break;
+        default:
+            return VLC_EGENERIC;
+    }
+
+    const cma_buf_t * const cb = cma_buf_pic_get(pic);
+    uint8_t * const data = cma_buf_addr(cb);
+    if (data == NULL) {
+        return VLC_ENOMEM;
+    }
+
+    uint8_t * frame_end = frame->data[0] + cma_buf_size(cb);
+    for (int i = 0; i != pic->i_planes; ++i) {
+        // Calculate lines from gap between planes
+        // This will give us an accurate "height" for later use by MMAL
+        const int lines = ((i + 1 == pic->i_planes ? frame_end : frame->data[i + 1]) -
+                           frame->data[i]) / frame->linesize[i];
+        pic->p[i] = (plane_t){
+            .p_pixels = data + (frame->data[i] - frame->data[0]),
+            .i_lines = lines,
+            .i_pitch = frame->linesize[i],
+            .i_pixel_pitch = pb[i],
+            .i_visible_lines = av_frame_cropped_height(frame) >> hs[i],
+            .i_visible_pitch = av_frame_cropped_width(frame) >> ws[i]
+        };
+    }
+    return 0;
+}
+
+
+//============================================================================
+//
+// Nicked from avcodec/fourcc.c
+//
+// * Really we should probably use that directly
+
+/*
+ * Video Codecs
+ */
+
+struct vlc_avcodec_fourcc
+{
+    vlc_fourcc_t i_fourcc;
+    unsigned i_codec;
+};
+
+
+static const struct vlc_avcodec_fourcc video_codecs[] =
+{
+    { VLC_CODEC_MP1V, AV_CODEC_ID_MPEG1VIDEO },
+    { VLC_CODEC_MP2V, AV_CODEC_ID_MPEG2VIDEO }, /* prefer MPEG2 over MPEG1 */
+    { VLC_CODEC_MPGV, AV_CODEC_ID_MPEG2VIDEO }, /* prefer MPEG2 over MPEG1 */
+    /* AV_CODEC_ID_MPEG2VIDEO_XVMC */
+    { VLC_CODEC_H261, AV_CODEC_ID_H261 },
+    { VLC_CODEC_H263, AV_CODEC_ID_H263 },
+    { VLC_CODEC_RV10, AV_CODEC_ID_RV10 },
+    { VLC_CODEC_RV13, AV_CODEC_ID_RV10 },
+    { VLC_CODEC_RV20, AV_CODEC_ID_RV20 },
+    { VLC_CODEC_MJPG, AV_CODEC_ID_MJPEG },
+    { VLC_CODEC_MJPGB, AV_CODEC_ID_MJPEGB },
+    { VLC_CODEC_LJPG, AV_CODEC_ID_LJPEG },
+    { VLC_CODEC_SP5X, AV_CODEC_ID_SP5X },
+    { VLC_CODEC_JPEGLS, AV_CODEC_ID_JPEGLS },
+    { VLC_CODEC_MP4V, AV_CODEC_ID_MPEG4 },
+    /* AV_CODEC_ID_RAWVIDEO */
+    { VLC_CODEC_DIV1, AV_CODEC_ID_MSMPEG4V1 },
+    { VLC_CODEC_DIV2, AV_CODEC_ID_MSMPEG4V2 },
+    { VLC_CODEC_DIV3, AV_CODEC_ID_MSMPEG4V3 },
+    { VLC_CODEC_WMV1, AV_CODEC_ID_WMV1 },
+    { VLC_CODEC_WMV2, AV_CODEC_ID_WMV2 },
+    { VLC_CODEC_H263P, AV_CODEC_ID_H263P },
+    { VLC_CODEC_H263I, AV_CODEC_ID_H263I },
+    { VLC_CODEC_FLV1, AV_CODEC_ID_FLV1 },
+    { VLC_CODEC_SVQ1, AV_CODEC_ID_SVQ1 },
+    { VLC_CODEC_SVQ3, AV_CODEC_ID_SVQ3 },
+    { VLC_CODEC_DV, AV_CODEC_ID_DVVIDEO },
+    { VLC_CODEC_HUFFYUV, AV_CODEC_ID_HUFFYUV },
+    { VLC_CODEC_CYUV, AV_CODEC_ID_CYUV },
+    { VLC_CODEC_H264, AV_CODEC_ID_H264 },
+    { VLC_CODEC_INDEO3, AV_CODEC_ID_INDEO3 },
+    { VLC_CODEC_VP3, AV_CODEC_ID_VP3 },
+    { VLC_CODEC_THEORA, AV_CODEC_ID_THEORA },
+#if ( !defined( WORDS_BIGENDIAN ) )
+    /* Asus Video (Another thing that doesn't work on PPC) */
+    { VLC_CODEC_ASV1, AV_CODEC_ID_ASV1 },
+    { VLC_CODEC_ASV2, AV_CODEC_ID_ASV2 },
+#endif
+    { VLC_CODEC_FFV1, AV_CODEC_ID_FFV1 },
+    { VLC_CODEC_4XM, AV_CODEC_ID_4XM },
+    { VLC_CODEC_VCR1, AV_CODEC_ID_VCR1 },
+    { VLC_CODEC_CLJR, AV_CODEC_ID_CLJR },
+    { VLC_CODEC_MDEC, AV_CODEC_ID_MDEC },
+    { VLC_CODEC_ROQ, AV_CODEC_ID_ROQ },
+    { VLC_CODEC_INTERPLAY, AV_CODEC_ID_INTERPLAY_VIDEO },
+    { VLC_CODEC_XAN_WC3, AV_CODEC_ID_XAN_WC3 },
+    { VLC_CODEC_XAN_WC4, AV_CODEC_ID_XAN_WC4 },
+    { VLC_CODEC_RPZA, AV_CODEC_ID_RPZA },
+    { VLC_CODEC_CINEPAK, AV_CODEC_ID_CINEPAK },
+    { VLC_CODEC_WS_VQA, AV_CODEC_ID_WS_VQA },
+    { VLC_CODEC_MSRLE, AV_CODEC_ID_MSRLE },
+    { VLC_CODEC_MSVIDEO1, AV_CODEC_ID_MSVIDEO1 },
+    { VLC_CODEC_IDCIN, AV_CODEC_ID_IDCIN },
+    { VLC_CODEC_8BPS, AV_CODEC_ID_8BPS },
+    { VLC_CODEC_SMC, AV_CODEC_ID_SMC },
+    { VLC_CODEC_FLIC, AV_CODEC_ID_FLIC },
+    { VLC_CODEC_TRUEMOTION1, AV_CODEC_ID_TRUEMOTION1 },
+    { VLC_CODEC_VMDVIDEO, AV_CODEC_ID_VMDVIDEO },
+    { VLC_CODEC_LCL_MSZH, AV_CODEC_ID_MSZH },
+    { VLC_CODEC_LCL_ZLIB, AV_CODEC_ID_ZLIB },
+    { VLC_CODEC_QTRLE, AV_CODEC_ID_QTRLE },
+    { VLC_CODEC_TSCC, AV_CODEC_ID_TSCC },
+    { VLC_CODEC_ULTI, AV_CODEC_ID_ULTI },
+    { VLC_CODEC_QDRAW, AV_CODEC_ID_QDRAW },
+    { VLC_CODEC_VIXL, AV_CODEC_ID_VIXL },
+    { VLC_CODEC_QPEG, AV_CODEC_ID_QPEG },
+    { VLC_CODEC_PNG, AV_CODEC_ID_PNG },
+    { VLC_CODEC_PPM, AV_CODEC_ID_PPM },
+    /* AV_CODEC_ID_PBM */
+    { VLC_CODEC_PGM, AV_CODEC_ID_PGM },
+    { VLC_CODEC_PGMYUV, AV_CODEC_ID_PGMYUV },
+    { VLC_CODEC_PAM, AV_CODEC_ID_PAM },
+    { VLC_CODEC_FFVHUFF, AV_CODEC_ID_FFVHUFF },
+    { VLC_CODEC_RV30, AV_CODEC_ID_RV30 },
+    { VLC_CODEC_RV40, AV_CODEC_ID_RV40 },
+    { VLC_CODEC_VC1,  AV_CODEC_ID_VC1 },
+    { VLC_CODEC_WMVA, AV_CODEC_ID_VC1 },
+    { VLC_CODEC_WMV3, AV_CODEC_ID_WMV3 },
+    { VLC_CODEC_WMVP, AV_CODEC_ID_WMV3 },
+    { VLC_CODEC_LOCO, AV_CODEC_ID_LOCO },
+    { VLC_CODEC_WNV1, AV_CODEC_ID_WNV1 },
+    { VLC_CODEC_AASC, AV_CODEC_ID_AASC },
+    { VLC_CODEC_INDEO2, AV_CODEC_ID_INDEO2 },
+    { VLC_CODEC_FRAPS, AV_CODEC_ID_FRAPS },
+    { VLC_CODEC_TRUEMOTION2, AV_CODEC_ID_TRUEMOTION2 },
+    { VLC_CODEC_BMP, AV_CODEC_ID_BMP },
+    { VLC_CODEC_CSCD, AV_CODEC_ID_CSCD },
+    { VLC_CODEC_MMVIDEO, AV_CODEC_ID_MMVIDEO },
+    { VLC_CODEC_ZMBV, AV_CODEC_ID_ZMBV },
+    { VLC_CODEC_AVS, AV_CODEC_ID_AVS },
+    { VLC_CODEC_SMACKVIDEO, AV_CODEC_ID_SMACKVIDEO },
+    { VLC_CODEC_NUV, AV_CODEC_ID_NUV },
+    { VLC_CODEC_KMVC, AV_CODEC_ID_KMVC },
+    { VLC_CODEC_FLASHSV, AV_CODEC_ID_FLASHSV },
+    { VLC_CODEC_CAVS, AV_CODEC_ID_CAVS },
+    { VLC_CODEC_JPEG2000, AV_CODEC_ID_JPEG2000 },
+    { VLC_CODEC_VMNC, AV_CODEC_ID_VMNC },
+    { VLC_CODEC_VP5, AV_CODEC_ID_VP5 },
+    { VLC_CODEC_VP6, AV_CODEC_ID_VP6 },
+    { VLC_CODEC_VP6F, AV_CODEC_ID_VP6F },
+    { VLC_CODEC_TARGA, AV_CODEC_ID_TARGA },
+    { VLC_CODEC_DSICINVIDEO, AV_CODEC_ID_DSICINVIDEO },
+    { VLC_CODEC_TIERTEXSEQVIDEO, AV_CODEC_ID_TIERTEXSEQVIDEO },
+    { VLC_CODEC_TIFF, AV_CODEC_ID_TIFF },
+    { VLC_CODEC_GIF, AV_CODEC_ID_GIF },
+    { VLC_CODEC_DXA, AV_CODEC_ID_DXA },
+    { VLC_CODEC_DNXHD, AV_CODEC_ID_DNXHD },
+    { VLC_CODEC_THP, AV_CODEC_ID_THP },
+    { VLC_CODEC_SGI, AV_CODEC_ID_SGI },
+    { VLC_CODEC_C93, AV_CODEC_ID_C93 },
+    { VLC_CODEC_BETHSOFTVID, AV_CODEC_ID_BETHSOFTVID },
+    /* AV_CODEC_ID_PTX */
+    { VLC_CODEC_TXD, AV_CODEC_ID_TXD },
+    { VLC_CODEC_VP6A, AV_CODEC_ID_VP6A },
+    { VLC_CODEC_AMV, AV_CODEC_ID_AMV },
+    { VLC_CODEC_VB, AV_CODEC_ID_VB },
+    { VLC_CODEC_PCX, AV_CODEC_ID_PCX },
+    /* AV_CODEC_ID_SUNRAST */
+    { VLC_CODEC_INDEO4, AV_CODEC_ID_INDEO4 },
+    { VLC_CODEC_INDEO5, AV_CODEC_ID_INDEO5 },
+    { VLC_CODEC_MIMIC, AV_CODEC_ID_MIMIC },
+    { VLC_CODEC_RL2, AV_CODEC_ID_RL2 },
+    { VLC_CODEC_ESCAPE124, AV_CODEC_ID_ESCAPE124 },
+    { VLC_CODEC_DIRAC, AV_CODEC_ID_DIRAC },
+    { VLC_CODEC_BFI, AV_CODEC_ID_BFI },
+    { VLC_CODEC_CMV, AV_CODEC_ID_CMV },
+    { VLC_CODEC_MOTIONPIXELS, AV_CODEC_ID_MOTIONPIXELS },
+    { VLC_CODEC_TGV, AV_CODEC_ID_TGV },
+    { VLC_CODEC_TGQ, AV_CODEC_ID_TGQ },
+    { VLC_CODEC_TQI, AV_CODEC_ID_TQI },
+    { VLC_CODEC_AURA, AV_CODEC_ID_AURA },
+    /* AV_CODEC_ID_AURA2 */
+    /* AV_CODEC_ID_V210X */
+    { VLC_CODEC_TMV, AV_CODEC_ID_TMV },
+    { VLC_CODEC_V210, AV_CODEC_ID_V210 },
+#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT( 54, 50, 100 ) && LIBAVCODEC_VERSION_MICRO >= 100
+    { VLC_CODEC_VUYA, AV_CODEC_ID_AYUV },
+#endif
+    /* AV_CODEC_ID_DPX */
+    { VLC_CODEC_MAD, AV_CODEC_ID_MAD },
+    { VLC_CODEC_FRWU, AV_CODEC_ID_FRWU },
+    { VLC_CODEC_FLASHSV2, AV_CODEC_ID_FLASHSV2 },
+    /* AV_CODEC_ID_CDGRAPHICS */
+    /* AV_CODEC_ID_R210 */
+    { VLC_CODEC_ANM, AV_CODEC_ID_ANM },
+    { VLC_CODEC_BINKVIDEO, AV_CODEC_ID_BINKVIDEO },
+    /* AV_CODEC_ID_IFF_ILBM */
+    /* AV_CODEC_ID_IFF_BYTERUN1 */
+    { VLC_CODEC_KGV1, AV_CODEC_ID_KGV1 },
+    { VLC_CODEC_YOP, AV_CODEC_ID_YOP },
+    { VLC_CODEC_VP8, AV_CODEC_ID_VP8 },
+    /* AV_CODEC_ID_PICTOR */
+    /* AV_CODEC_ID_ANSI */
+    /* AV_CODEC_ID_A64_MULTI */
+    /* AV_CODEC_ID_A64_MULTI5 */
+    /* AV_CODEC_ID_R10K */
+    { VLC_CODEC_MXPEG, AV_CODEC_ID_MXPEG },
+    { VLC_CODEC_LAGARITH, AV_CODEC_ID_LAGARITH },
+    { VLC_CODEC_PRORES, AV_CODEC_ID_PRORES },
+    { VLC_CODEC_JV, AV_CODEC_ID_JV },
+    { VLC_CODEC_DFA, AV_CODEC_ID_DFA },
+    { VLC_CODEC_WMVP, AV_CODEC_ID_WMV3IMAGE },
+    { VLC_CODEC_WMVP2, AV_CODEC_ID_VC1IMAGE },
+    { VLC_CODEC_UTVIDEO, AV_CODEC_ID_UTVIDEO },
+    { VLC_CODEC_BMVVIDEO, AV_CODEC_ID_BMV_VIDEO },
+    { VLC_CODEC_VBLE, AV_CODEC_ID_VBLE },
+    { VLC_CODEC_DXTORY, AV_CODEC_ID_DXTORY },
+    /* AV_CODEC_ID_V410 */
+    /* AV_CODEC_ID_XWD */
+    { VLC_CODEC_CDXL, AV_CODEC_ID_CDXL },
+    /* AV_CODEC_ID_XBM */
+    /* AV_CODEC_ID_ZEROCODEC */
+    { VLC_CODEC_MSS1, AV_CODEC_ID_MSS1 },
+    { VLC_CODEC_MSA1, AV_CODEC_ID_MSA1 },
+    { VLC_CODEC_TSC2, AV_CODEC_ID_TSCC2 },
+    { VLC_CODEC_MTS2, AV_CODEC_ID_MTS2 },
+    { VLC_CODEC_CLLC, AV_CODEC_ID_CLLC },
+    { VLC_CODEC_MSS2, AV_CODEC_ID_MSS2 },
+    { VLC_CODEC_VP9, AV_CODEC_ID_VP9 },
+#if LIBAVCODEC_VERSION_CHECK( 57, 26, 0, 83, 101 )
+    { VLC_CODEC_AV1, AV_CODEC_ID_AV1 },
+#endif
+    { VLC_CODEC_ICOD, AV_CODEC_ID_AIC },
+    /* AV_CODEC_ID_ESCAPE130 */
+    { VLC_CODEC_G2M4, AV_CODEC_ID_G2M },
+    { VLC_CODEC_G2M2, AV_CODEC_ID_G2M },
+    { VLC_CODEC_G2M3, AV_CODEC_ID_G2M },
+    /* AV_CODEC_ID_WEBP */
+    { VLC_CODEC_HNM4_VIDEO, AV_CODEC_ID_HNM4_VIDEO },
+    { VLC_CODEC_HEVC, AV_CODEC_ID_HEVC },
+
+    { VLC_CODEC_FIC , AV_CODEC_ID_FIC },
+    /* AV_CODEC_ID_ALIAS_PIX */
+    /* AV_CODEC_ID_BRENDER_PIX */
+    /* AV_CODEC_ID_PAF_VIDEO */
+    /* AV_CODEC_ID_EXR */
+
+    { VLC_CODEC_VP7 , AV_CODEC_ID_VP7 },
+    /* AV_CODEC_ID_SANM */
+    /* AV_CODEC_ID_SGIRLE */
+    /* AV_CODEC_ID_MVC1 */
+    /* AV_CODEC_ID_MVC2 */
+    { VLC_CODEC_HQX, AV_CODEC_ID_HQX },
+
+    { VLC_CODEC_TDSC, AV_CODEC_ID_TDSC },
+
+    { VLC_CODEC_HQ_HQA, AV_CODEC_ID_HQ_HQA },
+
+    { VLC_CODEC_HAP, AV_CODEC_ID_HAP },
+    /* AV_CODEC_ID_DDS */
+
+    { VLC_CODEC_DXV, AV_CODEC_ID_DXV },
+
+    /* ffmpeg only: AV_CODEC_ID_BRENDER_PIX */
+    /* ffmpeg only: AV_CODEC_ID_Y41P */
+    /* ffmpeg only: AV_CODEC_ID_EXR */
+    /* ffmpeg only: AV_CODEC_ID_AVRP */
+    /* ffmpeg only: AV_CODEC_ID_012V */
+    /* ffmpeg only: AV_CODEC_ID_AVUI */
+    /* ffmpeg only: AV_CODEC_ID_TARGA_Y216 */
+    /* ffmpeg only: AV_CODEC_ID_V308 */
+    /* ffmpeg only: AV_CODEC_ID_V408 */
+    /* ffmpeg only: AV_CODEC_ID_YUV4 */
+    /* ffmpeg only: AV_CODEC_ID_SANM */
+    /* ffmpeg only: AV_CODEC_ID_PAF_VIDEO */
+    /* ffmpeg only: AV_CODEC_ID_AVRN */
+    /* ffmpeg only: AV_CODEC_ID_CPIA */
+    /* ffmpeg only: AV_CODEC_ID_XFACE */
+    /* ffmpeg only: AV_CODEC_ID_SGIRLE */
+    /* ffmpeg only: AV_CODEC_ID_MVC1 */
+    /* ffmpeg only: AV_CODEC_ID_MVC2 */
+    /* ffmpeg only: AV_CODEC_ID_SNOW */
+    /* ffmpeg only: AV_CODEC_ID_SMVJPEG */
+
+#if LIBAVCODEC_VERSION_CHECK( 57, 999, 999, 24, 102 )
+    { VLC_CODEC_CINEFORM, AV_CODEC_ID_CFHD },
+#endif
+
+#if LIBAVCODEC_VERSION_CHECK( 57, 999, 999, 70, 100 )
+    { VLC_CODEC_PIXLET, AV_CODEC_ID_PIXLET },
+#endif
+
+#if LIBAVCODEC_VERSION_CHECK( 57, 999, 999, 71, 101 )
+    { VLC_CODEC_SPEEDHQ, AV_CODEC_ID_SPEEDHQ },
+#endif
+
+#if LIBAVCODEC_VERSION_CHECK( 57, 999, 999, 79, 100 )
+    { VLC_CODEC_FMVC, AV_CODEC_ID_FMVC },
+#endif
+};
+
+// *** Really we should probably use GetFfmpegCodec with a pre-kludge for the bits we care about
+static bool
+ZcGetFfmpegCodec( enum es_format_category_e cat, vlc_fourcc_t i_fourcc,
+                     unsigned *pi_ffmpeg_codec, const char **ppsz_name )
+{
+    const struct vlc_avcodec_fourcc *base;
+    size_t count;
+
+    base = video_codecs;
+    count = ARRAY_SIZE(video_codecs);
+    i_fourcc = vlc_fourcc_GetCodec( cat, i_fourcc );
+
+    for( size_t i = 0; i < count; i++ )
+    {
+        if( base[i].i_fourcc == i_fourcc )
+        {
+            if( pi_ffmpeg_codec != NULL )
+                *pi_ffmpeg_codec = base[i].i_codec;
+            if( ppsz_name )
+                *ppsz_name = vlc_fourcc_GetDescription( cat, i_fourcc );
+            return true;
+        }
+    }
+    return false;
+}
+
+
+
+//============================================================================
+// Derived from codec/avcodec/avcodec.c
+
+static AVCodecContext *
+ZcFfmpeg_AllocContext( decoder_t *p_dec,
+                                     const AVCodec **restrict codecp )
+{
+    unsigned i_codec_id;
+    const char *psz_namecodec;
+    const AVCodec *p_codec = NULL;
+
+    /* *** determine codec type *** */
+    if( !ZcGetFfmpegCodec( p_dec->fmt_in.i_cat, p_dec->fmt_in.i_codec,
+                         &i_codec_id, &psz_namecodec ) )
+         return NULL;
+
+    msg_Dbg( p_dec, "using %s %s", AVPROVIDER(LIBAVCODEC), LIBAVCODEC_IDENT );
+
+    /* Initialization must be done before avcodec_find_decoder() */
+    vlc_init_avcodec(VLC_OBJECT(p_dec));
+
+    /* *** ask ffmpeg for a decoder *** */
+    char *psz_decoder = var_InheritString( p_dec, "avcodec-codec" );
+    if( psz_decoder != NULL )
+    {
+        p_codec = avcodec_find_decoder_by_name( psz_decoder );
+        if( !p_codec )
+            msg_Err( p_dec, "Decoder `%s' not found", psz_decoder );
+        else if( p_codec->id != i_codec_id )
+        {
+            msg_Err( p_dec, "Decoder `%s' can't handle %4.4s",
+                    psz_decoder, (char*)&p_dec->fmt_in.i_codec );
+            p_codec = NULL;
+        }
+        free( psz_decoder );
+    }
+    if( !p_codec )
+//        p_codec = avcodec_find_decoder( i_codec_id );
+    {
+        if( p_dec->fmt_in.i_codec != VLC_CODEC_HEVC )
+            p_codec = avcodec_find_decoder(i_codec_id);
+        else
+        {
+            psz_namecodec = rpi_is_model_pi4() ? "hevc" : "hevc_rpi";
+            msg_Info(p_dec, "Looking for HEVC decoder '%s'", psz_namecodec);
+            p_codec = avcodec_find_decoder_by_name(psz_namecodec);
+        }
+    }
+
+    if( !p_codec )
+    {
+        msg_Dbg( p_dec, "codec not found (%s)", psz_namecodec );
+        return NULL;
+    }
+
+    *codecp = p_codec;
+
+    /* *** get a p_context *** */
+    AVCodecContext *avctx = avcodec_alloc_context3(p_codec);
+    if( unlikely(avctx == NULL) )
+        return NULL;
+
+    avctx->debug = var_InheritInteger( p_dec, "avcodec-debug" );
+    avctx->opaque = p_dec;
+    return avctx;
+}
+
+/*****************************************************************************
+ * ffmpeg_OpenCodec:
+ *****************************************************************************/
+
+static int
+ZcFfmpeg_OpenCodec( decoder_t *p_dec, AVCodecContext *ctx,
+                      const AVCodec *codec )
+{
+    char *psz_opts = var_InheritString( p_dec, "avcodec-options" );
+    AVDictionary *options = NULL;
+    int ret;
+
+    if (psz_opts) {
+        vlc_av_get_options(psz_opts, &options);
+        free(psz_opts);
+    }
+
+    if (av_rpi_zc_init2(ctx, p_dec, zc_alloc_buf, zc_free_pool) != 0)
+    {
+        msg_Err(p_dec, "Failed to init AV ZC");
+        return VLC_EGENERIC;
+    }
+
+    vlc_avcodec_lock();
+    ret = avcodec_open2( ctx, codec, options ? &options : NULL );
+    vlc_avcodec_unlock();
+
+    AVDictionaryEntry *t = NULL;
+    while ((t = av_dict_get(options, "", t, AV_DICT_IGNORE_SUFFIX))) {
+        msg_Err( p_dec, "Unknown option \"%s\"", t->key );
+    }
+    av_dict_free(&options);
+
+    if( ret < 0 )
+    {
+        msg_Err( p_dec, "cannot start codec (%s)", codec->name );
+        return VLC_EGENERIC;
+    }
+
+    msg_Dbg( p_dec, "codec (%s) started", codec->name );
+    return VLC_SUCCESS;
+}
+
+//============================================================================
+// Derived from 3.0.7.1 codec/avcodec/video.c
+
+static inline void wait_mt(decoder_sys_t *sys)
+{
+#if 1
+    // As we only ever update the output in our main thread this lock is
+    // redundant
+    VLC_UNUSED(sys);
+#else
+    vlc_sem_wait(&sys->sem_mt);
+#endif
+}
+
+static inline void post_mt(decoder_sys_t *sys)
+{
+#if 1
+    // As we only ever update the output in our main thread this lock is
+    // redundant
+    VLC_UNUSED(sys);
+#else
+    vlc_sem_post(&sys->sem_mt);
+#endif
+}
+
+/*****************************************************************************
+ * Local prototypes
+ *****************************************************************************/
+static void ffmpeg_InitCodec      ( decoder_t * );
+static int  DecodeVideo( decoder_t *, block_t * );
+static void Flush( decoder_t * );
+
+static uint32_t ffmpeg_CodecTag( vlc_fourcc_t fcc )
+{
+    uint8_t *p = (uint8_t*)&fcc;
+    return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
+}
+
+/*****************************************************************************
+ * Local Functions
+ *****************************************************************************/
+
+/**
+ * Sets the decoder output format.
+ */
+static int lavc_GetVideoFormat(decoder_t *dec, video_format_t *restrict fmt,
+                               AVCodecContext *ctx, enum AVPixelFormat pix_fmt,
+                               enum AVPixelFormat sw_pix_fmt)
+{
+    int width = ctx->coded_width;
+    int height = ctx->coded_height;
+
+    video_format_Init(fmt, 0);
+
+#if 1
+    VLC_UNUSED(sw_pix_fmt);
+    if ((fmt->i_chroma = ZcFindVlcChroma(pix_fmt)) == 0)
+        return -1;
+#else
+    if (pix_fmt == sw_pix_fmt)
+    {   /* software decoding */
+        int aligns[AV_NUM_DATA_POINTERS];
+
+        if (GetVlcChroma(fmt, pix_fmt))
+            return -1;
+
+        /* The libavcodec palette can only be fetched when the first output
+         * frame is decoded. Assume that the current chroma is RGB32 while we
+         * are waiting for a valid palette. Indeed, fmt_out.video.p_palette
+         * doesn't trigger a new vout request, but a new chroma yes. */
+        if (pix_fmt == AV_PIX_FMT_PAL8 && !dec->fmt_out.video.p_palette)
+            fmt->i_chroma = VLC_CODEC_RGB32;
+
+        avcodec_align_dimensions2(ctx, &width, &height, aligns);
+    }
+    else /* hardware decoding */
+        fmt->i_chroma = vlc_va_GetChroma(pix_fmt, sw_pix_fmt);
+#endif
+
+    if( width == 0 || height == 0 || width > 8192 || height > 8192 ||
+        width < ctx->width || height < ctx->height )
+    {
+        msg_Err(dec, "Invalid frame size %dx%d vsz %dx%d",
+                     width, height, ctx->width, ctx->height );
+        return -1; /* invalid display size */
+    }
+
+    fmt->i_width = width;
+    fmt->i_height = height;
+    fmt->i_visible_width = ctx->width;
+    fmt->i_visible_height = ctx->height;
+
+    /* If an aspect-ratio was specified in the input format then force it */
+    if (dec->fmt_in.video.i_sar_num > 0 && dec->fmt_in.video.i_sar_den > 0)
+    {
+        fmt->i_sar_num = dec->fmt_in.video.i_sar_num;
+        fmt->i_sar_den = dec->fmt_in.video.i_sar_den;
+    }
+    else
+    {
+        fmt->i_sar_num = ctx->sample_aspect_ratio.num;
+        fmt->i_sar_den = ctx->sample_aspect_ratio.den;
+
+        if (fmt->i_sar_num == 0 || fmt->i_sar_den == 0)
+            fmt->i_sar_num = fmt->i_sar_den = 1;
+    }
+
+    if (dec->fmt_in.video.i_frame_rate > 0
+     && dec->fmt_in.video.i_frame_rate_base > 0)
+    {
+        fmt->i_frame_rate = dec->fmt_in.video.i_frame_rate;
+        fmt->i_frame_rate_base = dec->fmt_in.video.i_frame_rate_base;
+    }
+    else if (ctx->framerate.num > 0 && ctx->framerate.den > 0)
+    {
+        fmt->i_frame_rate = ctx->framerate.num;
+        fmt->i_frame_rate_base = ctx->framerate.den;
+# if LIBAVCODEC_VERSION_MICRO <  100
+        // for some reason libav don't thinkg framerate presents actually same thing as in ffmpeg
+        fmt->i_frame_rate_base *= __MAX(ctx->ticks_per_frame, 1);
+# endif
+    }
+    else if (ctx->time_base.num > 0 && ctx->time_base.den > 0)
+    {
+        fmt->i_frame_rate = ctx->time_base.den;
+        fmt->i_frame_rate_base = ctx->time_base.num
+                                 * __MAX(ctx->ticks_per_frame, 1);
+    }
+
+    /* FIXME we should only set the known values and let the core decide
+     * later of fallbacks, but we can't do that with a boolean */
+    switch ( ctx->color_range )
+    {
+    case AVCOL_RANGE_JPEG:
+        fmt->b_color_range_full = true;
+        break;
+    case AVCOL_RANGE_UNSPECIFIED:
+        fmt->b_color_range_full = !vlc_fourcc_IsYUV( fmt->i_chroma );
+        break;
+    case AVCOL_RANGE_MPEG:
+    default:
+        fmt->b_color_range_full = false;
+        break;
+    }
+
+    switch( ctx->colorspace )
+    {
+        case AVCOL_SPC_BT709:
+            fmt->space = COLOR_SPACE_BT709;
+            break;
+        case AVCOL_SPC_SMPTE170M:
+        case AVCOL_SPC_BT470BG:
+            fmt->space = COLOR_SPACE_BT601;
+            break;
+        case AVCOL_SPC_BT2020_NCL:
+        case AVCOL_SPC_BT2020_CL:
+            fmt->space = COLOR_SPACE_BT2020;
+            break;
+        default:
+            break;
+    }
+
+    switch( ctx->color_trc )
+    {
+        case AVCOL_TRC_LINEAR:
+            fmt->transfer = TRANSFER_FUNC_LINEAR;
+            break;
+        case AVCOL_TRC_GAMMA22:
+            fmt->transfer = TRANSFER_FUNC_SRGB;
+            break;
+        case AVCOL_TRC_BT709:
+            fmt->transfer = TRANSFER_FUNC_BT709;
+            break;
+        case AVCOL_TRC_SMPTE170M:
+        case AVCOL_TRC_BT2020_10:
+        case AVCOL_TRC_BT2020_12:
+            fmt->transfer = TRANSFER_FUNC_BT2020;
+            break;
+#if LIBAVUTIL_VERSION_CHECK( 55, 14, 0, 31, 100)
+        case AVCOL_TRC_ARIB_STD_B67:
+            fmt->transfer = TRANSFER_FUNC_ARIB_B67;
+            break;
+#endif
+#if LIBAVUTIL_VERSION_CHECK( 55, 17, 0, 37, 100)
+        case AVCOL_TRC_SMPTE2084:
+            fmt->transfer = TRANSFER_FUNC_SMPTE_ST2084;
+            break;
+        case AVCOL_TRC_SMPTE240M:
+            fmt->transfer = TRANSFER_FUNC_SMPTE_240;
+            break;
+        case AVCOL_TRC_GAMMA28:
+            fmt->transfer = TRANSFER_FUNC_BT470_BG;
+            break;
+#endif
+        default:
+            break;
+    }
+
+    switch( ctx->color_primaries )
+    {
+        case AVCOL_PRI_BT709:
+            fmt->primaries = COLOR_PRIMARIES_BT709;
+            break;
+        case AVCOL_PRI_BT470BG:
+            fmt->primaries = COLOR_PRIMARIES_BT601_625;
+            break;
+        case AVCOL_PRI_SMPTE170M:
+        case AVCOL_PRI_SMPTE240M:
+            fmt->primaries = COLOR_PRIMARIES_BT601_525;
+            break;
+        case AVCOL_PRI_BT2020:
+            fmt->primaries = COLOR_PRIMARIES_BT2020;
+            break;
+        default:
+            break;
+    }
+
+    switch( ctx->chroma_sample_location )
+    {
+        case AVCHROMA_LOC_LEFT:
+            fmt->chroma_location = CHROMA_LOCATION_LEFT;
+            break;
+        case AVCHROMA_LOC_CENTER:
+            fmt->chroma_location = CHROMA_LOCATION_CENTER;
+            break;
+        case AVCHROMA_LOC_TOPLEFT:
+            fmt->chroma_location = CHROMA_LOCATION_TOP_LEFT;
+            break;
+        default:
+            break;
+    }
+
+    return 0;
+}
+
+static int lavc_UpdateVideoFormat(decoder_t *dec, AVCodecContext *ctx,
+                                  enum AVPixelFormat fmt,
+                                  enum AVPixelFormat swfmt)
+{
+    video_format_t fmt_out;
+    int val;
+#if TRACE_ALL
+    msg_Dbg(dec, "<<< %s", __func__);
+#endif
+    val = lavc_GetVideoFormat(dec, &fmt_out, ctx, fmt, swfmt);
+    if (val)
+    {
+        msg_Dbg(dec, "Failed to get format");
+        return val;
+    }
+
+    /* always have date in fields/ticks units */
+    if(dec->p_sys->pts.i_divider_num)
+        date_Change(&dec->p_sys->pts, fmt_out.i_frame_rate *
+                                      __MAX(ctx->ticks_per_frame, 1),
+                                      fmt_out.i_frame_rate_base);
+    else
+        date_Init(&dec->p_sys->pts, fmt_out.i_frame_rate *
+                                    __MAX(ctx->ticks_per_frame, 1),
+                                    fmt_out.i_frame_rate_base);
+
+    fmt_out.p_palette = dec-> fmt_out.video.p_palette;
+    dec->fmt_out.video.p_palette = NULL;
+
+    es_format_Change(&dec->fmt_out, VIDEO_ES, fmt_out.i_chroma);
+    dec->fmt_out.video = fmt_out;
+    dec->fmt_out.video.orientation = dec->fmt_in.video.orientation;
+    dec->fmt_out.video.projection_mode = dec->fmt_in.video.projection_mode;
+    dec->fmt_out.video.multiview_mode = dec->fmt_in.video.multiview_mode;
+    dec->fmt_out.video.pose = dec->fmt_in.video.pose;
+    if ( dec->fmt_in.video.mastering.max_luminance )
+        dec->fmt_out.video.mastering = dec->fmt_in.video.mastering;
+    dec->fmt_out.video.lighting = dec->fmt_in.video.lighting;
+
+    val = decoder_UpdateVideoFormat(dec);
+#if TRACE_ALL
+    msg_Dbg(dec, ">>> %s: rv=%d", __func__, val);
+#endif
+    return val;
+}
+
+static int OpenVideoCodec( decoder_t *p_dec )
+{
+    decoder_sys_t *p_sys = p_dec->p_sys;
+    AVCodecContext *ctx = p_sys->p_context;
+    const AVCodec *codec = p_sys->p_codec;
+    int ret;
+
+    if( ctx->extradata_size <= 0 )
+    {
+        if( codec->id == AV_CODEC_ID_VC1 ||
+            codec->id == AV_CODEC_ID_THEORA )
+        {
+            msg_Warn( p_dec, "waiting for extra data for codec %s",
+                      codec->name );
+            return 1;
+        }
+    }
+
+    ctx->width  = p_dec->fmt_in.video.i_visible_width;
+    ctx->height = p_dec->fmt_in.video.i_visible_height;
+
+    ctx->coded_width = p_dec->fmt_in.video.i_width;
+    ctx->coded_height = p_dec->fmt_in.video.i_height;
+
+    ctx->bits_per_coded_sample = p_dec->fmt_in.video.i_bits_per_pixel;
+    p_sys->pix_fmt = AV_PIX_FMT_NONE;
+    p_sys->profile = -1;
+    p_sys->level = -1;
+    cc_Init( &p_sys->cc );
+
+    set_video_color_settings( &p_dec->fmt_in.video, ctx );
+    if( p_dec->fmt_in.video.i_frame_rate_base &&
+        p_dec->fmt_in.video.i_frame_rate &&
+        (double) p_dec->fmt_in.video.i_frame_rate /
+                 p_dec->fmt_in.video.i_frame_rate_base < 6 )
+    {
+        ctx->flags |= AV_CODEC_FLAG_LOW_DELAY;
+    }
+
+    post_mt( p_sys );
+    ret = ZcFfmpeg_OpenCodec( p_dec, ctx, codec );
+    wait_mt( p_sys );
+    if( ret < 0 )
+        return ret;
+
+    switch( ctx->active_thread_type )
+    {
+        case FF_THREAD_FRAME:
+            msg_Dbg( p_dec, "using frame thread mode with %d threads",
+                     ctx->thread_count );
+            break;
+        case FF_THREAD_SLICE:
+            msg_Dbg( p_dec, "using slice thread mode with %d threads",
+                     ctx->thread_count );
+            break;
+        case 0:
+            if( ctx->thread_count > 1 )
+                msg_Warn( p_dec, "failed to enable threaded decoding" );
+            break;
+        default:
+            msg_Warn( p_dec, "using unknown thread mode with %d threads",
+                      ctx->thread_count );
+            break;
+    }
+    return 0;
+}
+
+/*****************************************************************************
+ * InitVideo: initialize the video decoder
+ *****************************************************************************
+ * the ffmpeg codec will be opened, some memory allocated. The vout is not yet
+ * opened (done after the first decoded frame).
+ *****************************************************************************/
+static int MmalAvcodecOpenDecoder( vlc_object_t *obj )
+{
+    decoder_t *p_dec = (decoder_t *)obj;
+    const AVCodec *p_codec;
+
+    int extra_buffers = var_InheritInteger(p_dec, MMAL_AVCODEC_BUFFERS);
+
+    if (extra_buffers < 0)
+    {
+        extra_buffers = p_dec->fmt_in.video.i_height * p_dec->fmt_in.video.i_width >= 1920 * 1088 ?
+            BUFFERS_IN_FLIGHT_UHD : BUFFERS_IN_FLIGHT;
+    }
+
+    if (extra_buffers <= 0)
+    {
+        msg_Dbg(p_dec, "%s: extra_buffers=%d - cannot use module", __func__, extra_buffers);
+        return VLC_EGENERIC;
+    }
+
+    const vcsm_init_type_t vcsm_type = cma_vcsm_init();
+    const int vcsm_size =
+        vcsm_type == VCSM_INIT_LEGACY ? hw_mmal_get_gpu_mem() : 512 << 20;
+
+#if 1
+    {
+        char buf1[5], buf2[5], buf2a[5];
+        char buf3[5], buf4[5];
+        uint32_t in_fcc = 0;
+        msg_Dbg(p_dec, "%s: <<< (%s/%s)[%s] %dx%d -> (%s/%s) %dx%d [%s/%d] xb:%d", __func__,
+                str_fourcc(buf1, p_dec->fmt_in.i_codec),
+                str_fourcc(buf2, p_dec->fmt_in.video.i_chroma),
+                str_fourcc(buf2a, in_fcc),
+                p_dec->fmt_in.video.i_width, p_dec->fmt_in.video.i_height,
+                str_fourcc(buf3, p_dec->fmt_out.i_codec),
+                str_fourcc(buf4, p_dec->fmt_out.video.i_chroma),
+                p_dec->fmt_out.video.i_width, p_dec->fmt_out.video.i_height,
+                cma_vcsm_init_str(vcsm_type), vcsm_size, extra_buffers);
+    }
+#endif
+
+    if( vcsm_type == VCSM_INIT_NONE )
+        return VLC_EGENERIC;
+#if 1
+    if( (p_dec->fmt_in.i_codec != VLC_CODEC_HEVC &&
+         (vcsm_type == VCSM_INIT_CMA || vcsm_size < (96 << 20))) ||
+        (p_dec->fmt_in.i_codec == VLC_CODEC_HEVC &&
+         vcsm_size < (128 << 20)))
+    {
+        cma_vcsm_exit(vcsm_type);
+        return VLC_EGENERIC;
+    }
+#endif
+
+    AVCodecContext *p_context = ZcFfmpeg_AllocContext( p_dec, &p_codec );
+    if( p_context == NULL )
+    {
+        cma_vcsm_exit(vcsm_type);
+        return VLC_EGENERIC;
+    }
+
+    int i_val;
+
+    /* Allocate the memory needed to store the decoder's structure */
+    decoder_sys_t *p_sys = calloc( 1, sizeof(*p_sys) );
+    if( unlikely(p_sys == NULL) )
+    {
+        avcodec_free_context( &p_context );
+        cma_vcsm_exit(vcsm_type);
+        return VLC_ENOMEM;
+    }
+
+    p_dec->p_sys = p_sys;
+    p_sys->p_context = p_context;
+    p_sys->p_codec = p_codec;
+    p_sys->p_dec = p_dec;
+//    p_sys->p_va = NULL;
+    p_sys->cma_in_flight_max = extra_buffers;
+    p_sys->vcsm_init_type = vcsm_type;
+    vlc_sem_init( &p_sys->sem_mt, 0 );
+
+    /* ***** Fill p_context with init values ***** */
+    p_context->codec_tag = ffmpeg_CodecTag( p_dec->fmt_in.i_original_fourcc ?
+                                p_dec->fmt_in.i_original_fourcc : p_dec->fmt_in.i_codec );
+
+    /*  ***** Get configuration of ffmpeg plugin ***** */
+    p_context->workaround_bugs =
+        var_InheritInteger( p_dec, "avcodec-workaround-bugs" );
+    p_context->err_recognition =
+        var_InheritInteger( p_dec, "avcodec-error-resilience" );
+
+    if( var_CreateGetBool( p_dec, "grayscale" ) )
+        p_context->flags |= AV_CODEC_FLAG_GRAY;
+
+    /* ***** Output always the frames ***** */
+    p_context->flags |= AV_CODEC_FLAG_OUTPUT_CORRUPT;
+
+    i_val = var_CreateGetInteger( p_dec, "avcodec-skiploopfilter" );
+    if( i_val >= 4 ) p_context->skip_loop_filter = AVDISCARD_ALL;
+    else if( i_val == 3 ) p_context->skip_loop_filter = AVDISCARD_NONKEY;
+    else if( i_val == 2 ) p_context->skip_loop_filter = AVDISCARD_BIDIR;
+    else if( i_val == 1 ) p_context->skip_loop_filter = AVDISCARD_NONREF;
+    else p_context->skip_loop_filter = AVDISCARD_DEFAULT;
+
+    if( var_CreateGetBool( p_dec, "avcodec-fast" ) )
+        p_context->flags2 |= AV_CODEC_FLAG2_FAST;
+
+    /* ***** libavcodec frame skipping ***** */
+    p_sys->b_hurry_up = var_CreateGetBool( p_dec, "avcodec-hurry-up" );
+    p_sys->b_show_corrupted = var_CreateGetBool( p_dec, "avcodec-corrupted" );
+
+    i_val = var_CreateGetInteger( p_dec, "avcodec-skip-frame" );
+    if( i_val >= 4 ) p_sys->i_skip_frame = AVDISCARD_ALL;
+    else if( i_val == 3 ) p_sys->i_skip_frame = AVDISCARD_NONKEY;
+    else if( i_val == 2 ) p_sys->i_skip_frame = AVDISCARD_BIDIR;
+    else if( i_val == 1 ) p_sys->i_skip_frame = AVDISCARD_NONREF;
+    else if( i_val == -1 ) p_sys->i_skip_frame = AVDISCARD_NONE;
+    else p_sys->i_skip_frame = AVDISCARD_DEFAULT;
+    p_context->skip_frame = p_sys->i_skip_frame;
+
+    i_val = var_CreateGetInteger( p_dec, "avcodec-skip-idct" );
+    if( i_val >= 4 ) p_context->skip_idct = AVDISCARD_ALL;
+    else if( i_val == 3 ) p_context->skip_idct = AVDISCARD_NONKEY;
+    else if( i_val == 2 ) p_context->skip_idct = AVDISCARD_BIDIR;
+    else if( i_val == 1 ) p_context->skip_idct = AVDISCARD_NONREF;
+    else if( i_val == -1 ) p_context->skip_idct = AVDISCARD_NONE;
+    else p_context->skip_idct = AVDISCARD_DEFAULT;
+
+    /* ***** libavcodec direct rendering ***** */
+    p_sys->b_direct_rendering = false;
+    atomic_init(&p_sys->b_dr_failure, false);
+    if( var_CreateGetBool( p_dec, "avcodec-dr" ) &&
+       (p_codec->capabilities & AV_CODEC_CAP_DR1) &&
+        /* No idea why ... but this fixes flickering on some TSCC streams */
+        p_sys->p_codec->id != AV_CODEC_ID_TSCC &&
+        p_sys->p_codec->id != AV_CODEC_ID_CSCD &&
+        p_sys->p_codec->id != AV_CODEC_ID_CINEPAK )
+    {
+        /* Some codecs set pix_fmt only after the 1st frame has been decoded,
+         * so we need to do another check in ffmpeg_GetFrameBuf() */
+        p_sys->b_direct_rendering = true;
+    }
+
+    p_context->get_format = ZcGetFormat;
+#if 0
+    p_context->get_format = ffmpeg_GetFormat;
+    /* Always use our get_buffer wrapper so we can calculate the
+     * PTS correctly */
+    p_context->get_buffer2 = lavc_GetFrame;
+    p_context->opaque = p_dec;
+#endif
+
+    int i_thread_count = var_InheritInteger( p_dec, "avcodec-threads" );
+    if( i_thread_count <= 0 )
+#if 1
+    {
+        // Pick 5 threads for everything on Pi except for HEVC where the h/w
+        // really limits the useful size to 3
+        i_thread_count = p_codec->id == AV_CODEC_ID_HEVC ? 3 : 5;
+    }
+#else
+    {
+        i_thread_count = vlc_GetCPUCount();
+        if( i_thread_count > 1 )
+            i_thread_count++;
+
+        //FIXME: take in count the decoding time
+#if VLC_WINSTORE_APP
+        i_thread_count = __MIN( i_thread_count, 6 );
+#else
+        i_thread_count = __MIN( i_thread_count, p_codec->id == AV_CODEC_ID_HEVC ? 10 : 6 );
+#endif
+    }
+    i_thread_count = __MIN( i_thread_count, p_codec->id == AV_CODEC_ID_HEVC ? 32 : 16 );
+#endif
+    msg_Dbg( p_dec, "allowing %d thread(s) for decoding", i_thread_count );
+    p_context->thread_count = i_thread_count;
+    p_context->thread_safe_callbacks = true;
+
+    switch( p_codec->id )
+    {
+        case AV_CODEC_ID_MPEG4:
+        case AV_CODEC_ID_H263:
+            p_context->thread_type = 0;
+            break;
+        case AV_CODEC_ID_MPEG1VIDEO:
+        case AV_CODEC_ID_MPEG2VIDEO:
+            p_context->thread_type &= ~FF_THREAD_SLICE;
+            /* fall through */
+# if (LIBAVCODEC_VERSION_INT < AV_VERSION_INT(55, 1, 0))
+        case AV_CODEC_ID_H264:
+        case AV_CODEC_ID_VC1:
+        case AV_CODEC_ID_WMV3:
+            p_context->thread_type &= ~FF_THREAD_FRAME;
+# endif
+        default:
+            break;
+    }
+
+    if( p_context->thread_type & FF_THREAD_FRAME )
+        p_dec->i_extra_picture_buffers = 2 * p_context->thread_count;
+
+    /* ***** misc init ***** */
+    date_Init(&p_sys->pts, 1, 30001);
+    date_Set(&p_sys->pts, VLC_TS_INVALID);
+    p_sys->b_first_frame = true;
+    p_sys->i_late_frames = 0;
+    p_sys->b_from_preroll = false;
+
+    /* Set output properties */
+    if( ZcGetVlcChroma( &p_dec->fmt_out.video, p_context->pix_fmt ) != VLC_SUCCESS )
+    {
+        /* we are doomed. but not really, because most codecs set their pix_fmt later on */
+//        p_dec->fmt_out.i_codec = VLC_CODEC_I420;
+        p_dec->fmt_out.i_codec = VLC_CODEC_MMAL_ZC_I420;
+    }
+    p_dec->fmt_out.i_codec = p_dec->fmt_out.video.i_chroma;
+
+    p_dec->fmt_out.video.orientation = p_dec->fmt_in.video.orientation;
+
+    if( p_dec->fmt_in.video.p_palette ) {
+        p_sys->palette_sent = false;
+        p_dec->fmt_out.video.p_palette = malloc( sizeof(video_palette_t) );
+        if( p_dec->fmt_out.video.p_palette )
+            *p_dec->fmt_out.video.p_palette = *p_dec->fmt_in.video.p_palette;
+    } else
+        p_sys->palette_sent = true;
+
+    if ((p_sys->cma_pool = cma_buf_pool_new(p_sys->cma_in_flight_max, p_sys->cma_in_flight_max, false, "mmal_avcodec")) == NULL)
+    {
+        msg_Err(p_dec, "CMA pool alloc failure");
+        goto fail;
+    }
+
+    /* ***** init this codec with special data ***** */
+    ffmpeg_InitCodec( p_dec );
+
+    /* ***** Open the codec ***** */
+    if( OpenVideoCodec( p_dec ) < 0 )
+    {
+        vlc_sem_destroy( &p_sys->sem_mt );
+        free( p_sys );
+        avcodec_free_context( &p_context );
+        return VLC_EGENERIC;
+    }
+
+    p_dec->pf_decode = DecodeVideo;
+    p_dec->pf_flush  = Flush;
+
+    /* XXX: Writing input format makes little sense. */
+    if( p_context->profile != FF_PROFILE_UNKNOWN )
+        p_dec->fmt_in.i_profile = p_context->profile;
+    if( p_context->level != FF_LEVEL_UNKNOWN )
+        p_dec->fmt_in.i_level = p_context->level;
+
+#if 1
+    // Most of the time we have nothing useful by way of a format here
+    // wait till we've decoded something
+#else
+    // Update output format
+    if (lavc_UpdateVideoFormat(p_dec, p_context, p_context->pix_fmt,
+                               p_context->pix_fmt) != 0)
+    {
+        msg_Err(p_dec, "Unable to update format: pix_fmt=%d", p_context->pix_fmt);
+//        goto fail;
+    }
+#endif
+
+#if TRACE_ALL
+    msg_Dbg(p_dec, "<<< %s: OK", __func__);
+#endif
+    return VLC_SUCCESS;
+
+fail:
+    MmalAvcodecCloseDecoder(VLC_OBJECT(p_dec));
+
+#if TRACE_ALL
+    msg_Dbg(p_dec, "<<< %s: FAIL", __func__);
+#endif
+
+    return VLC_EGENERIC;
+}
+
+/*****************************************************************************
+ * Flush:
+ *****************************************************************************/
+static void Flush( decoder_t *p_dec )
+{
+    decoder_sys_t *p_sys = p_dec->p_sys;
+    AVCodecContext *p_context = p_sys->p_context;
+
+#if TRACE_ALL
+    msg_Dbg(p_dec, "<<< %s", __func__);
+#endif
+
+    date_Set(&p_sys->pts, VLC_TS_INVALID); /* To make sure we recover properly */
+    p_sys->i_late_frames = 0;
+    cc_Flush( &p_sys->cc );
+
+    /* Abort pictures in order to unblock all avcodec workers threads waiting
+     * for a picture. This will avoid a deadlock between avcodec_flush_buffers
+     * and workers threads */
+// It would probably be good to use AbortPicture but that often deadlocks on close
+// and given that we wait for pics in the main thread it should be unneeded (whereas
+// cma is alloced in the depths of ffmpeg on its own threads)
+//    decoder_AbortPictures( p_dec, true );
+    cma_buf_pool_cancel(p_sys->cma_pool);
+
+    post_mt( p_sys );
+    /* do not flush buffers if codec hasn't been opened (theora/vorbis/VC1) */
+    if( avcodec_is_open( p_context ) )
+        avcodec_flush_buffers( p_context );
+    wait_mt( p_sys );
+
+    /* Reset cancel state to false */
+    cma_buf_pool_uncancel(p_sys->cma_pool);
+//    decoder_AbortPictures( p_dec, false );
+
+#if TRACE_ALL
+    msg_Dbg(p_dec, ">>> %s", __func__);
+#endif
+
+}
+
+static bool check_block_validity( decoder_sys_t *p_sys, block_t *block )
+{
+    if( !block)
+        return true;
+
+    if( block->i_flags & (BLOCK_FLAG_DISCONTINUITY|BLOCK_FLAG_CORRUPTED) )
+    {
+        date_Set( &p_sys->pts, VLC_TS_INVALID ); /* To make sure we recover properly */
+        cc_Flush( &p_sys->cc );
+
+        p_sys->i_late_frames = 0;
+        if( block->i_flags & BLOCK_FLAG_CORRUPTED )
+        {
+            block_Release( block );
+            return false;
+        }
+    }
+    return true;
+}
+
+static bool check_block_being_late( decoder_sys_t *p_sys, block_t *block, mtime_t current_time)
+{
+    if( !block )
+        return false;
+    if( block->i_flags & BLOCK_FLAG_PREROLL )
+    {
+        /* Do not care about late frames when prerolling
+         * TODO avoid decoding of non reference frame
+         * (ie all B except for H264 where it depends only on nal_ref_idc) */
+        p_sys->i_late_frames = 0;
+        p_sys->b_from_preroll = true;
+        p_sys->i_last_late_delay = INT64_MAX;
+    }
+
+    if( p_sys->i_late_frames <= 0 )
+        return false;
+
+    if( current_time - p_sys->i_late_frames_start > (5*CLOCK_FREQ))
+    {
+        date_Set( &p_sys->pts, VLC_TS_INVALID ); /* To make sure we recover properly */
+        block_Release( block );
+        p_sys->i_late_frames--;
+        return true;
+    }
+    return false;
+}
+
+static bool check_frame_should_be_dropped( decoder_sys_t *p_sys, AVCodecContext *p_context, bool *b_need_output_picture )
+{
+    if( p_sys->i_late_frames <= 4)
+        return false;
+
+    *b_need_output_picture = false;
+    if( p_sys->i_late_frames < 12 )
+    {
+        p_context->skip_frame =
+                (p_sys->i_skip_frame <= AVDISCARD_NONREF) ?
+                AVDISCARD_NONREF : p_sys->i_skip_frame;
+    }
+    else
+    {
+        /* picture too late, won't decode
+         * but break picture until a new I, and for mpeg4 ...*/
+        p_sys->i_late_frames--; /* needed else it will never be decrease */
+        return true;
+    }
+    return false;
+}
+
+static mtime_t interpolate_next_pts( decoder_t *p_dec, AVFrame *frame )
+{
+    decoder_sys_t *p_sys = p_dec->p_sys;
+    AVCodecContext *p_context = p_sys->p_context;
+
+    if( date_Get( &p_sys->pts ) == VLC_TS_INVALID ||
+        p_sys->pts.i_divider_num == 0 )
+        return VLC_TS_INVALID;
+
+    int i_tick = p_context->ticks_per_frame;
+    if( i_tick <= 0 )
+        i_tick = 1;
+
+    /* interpolate the next PTS */
+    return date_Increment( &p_sys->pts, i_tick + frame->repeat_pict );
+}
+
+static void update_late_frame_count( decoder_t *p_dec, block_t *p_block,
+                                     mtime_t current_time, mtime_t i_pts,
+                                     mtime_t i_next_pts )
+{
+    decoder_sys_t *p_sys = p_dec->p_sys;
+   /* Update frame late count (except when doing preroll) */
+   mtime_t i_display_date = VLC_TS_INVALID;
+   if( !p_block || !(p_block->i_flags & BLOCK_FLAG_PREROLL) )
+       i_display_date = decoder_GetDisplayDate( p_dec, i_pts );
+
+   mtime_t i_threshold = i_next_pts != VLC_TS_INVALID ? (i_next_pts - i_pts) / 2 : 20000;
+
+   if( i_display_date > VLC_TS_INVALID && i_display_date + i_threshold <= current_time )
+   {
+       /* Out of preroll, consider only late frames on rising delay */
+       if( p_sys->b_from_preroll )
+       {
+           if( p_sys->i_last_late_delay > current_time - i_display_date )
+           {
+               p_sys->i_last_late_delay = current_time - i_display_date;
+               return;
+           }
+           p_sys->b_from_preroll = false;
+       }
+
+       p_sys->i_late_frames++;
+       if( p_sys->i_late_frames == 1 )
+           p_sys->i_late_frames_start = current_time;
+
+   }
+   else
+   {
+       p_sys->i_late_frames = 0;
+   }
+}
+
+
+static int DecodeSidedata( decoder_t *p_dec, const AVFrame *frame, picture_t *p_pic )
+{
+    decoder_sys_t *p_sys = p_dec->p_sys;
+    bool format_changed = false;
+
+#if (LIBAVUTIL_VERSION_MICRO >= 100 && LIBAVUTIL_VERSION_INT >= AV_VERSION_INT( 55, 16, 101 ) )
+#define FROM_AVRAT(default_factor, avrat) \
+(uint64_t)(default_factor) * (avrat).num / (avrat).den
+    const AVFrameSideData *metadata =
+            av_frame_get_side_data( frame,
+                                    AV_FRAME_DATA_MASTERING_DISPLAY_METADATA );
+    if ( metadata )
+    {
+        const AVMasteringDisplayMetadata *hdr_meta =
+                (const AVMasteringDisplayMetadata *) metadata->data;
+        if ( hdr_meta->has_luminance )
+        {
+#define ST2086_LUMA_FACTOR 10000
+            p_pic->format.mastering.max_luminance =
+                    FROM_AVRAT(ST2086_LUMA_FACTOR, hdr_meta->max_luminance);
+            p_pic->format.mastering.min_luminance =
+                    FROM_AVRAT(ST2086_LUMA_FACTOR, hdr_meta->min_luminance);
+        }
+        if ( hdr_meta->has_primaries )
+        {
+#define ST2086_RED   2
+#define ST2086_GREEN 0
+#define ST2086_BLUE  1
+#define LAV_RED    0
+#define LAV_GREEN  1
+#define LAV_BLUE   2
+#define ST2086_PRIM_FACTOR 50000
+            p_pic->format.mastering.primaries[ST2086_RED*2   + 0] =
+                    FROM_AVRAT(ST2086_PRIM_FACTOR, hdr_meta->display_primaries[LAV_RED][0]);
+            p_pic->format.mastering.primaries[ST2086_RED*2   + 1] =
+                    FROM_AVRAT(ST2086_PRIM_FACTOR, hdr_meta->display_primaries[LAV_RED][1]);
+            p_pic->format.mastering.primaries[ST2086_GREEN*2 + 0] =
+                    FROM_AVRAT(ST2086_PRIM_FACTOR, hdr_meta->display_primaries[LAV_GREEN][0]);
+            p_pic->format.mastering.primaries[ST2086_GREEN*2 + 1] =
+                    FROM_AVRAT(ST2086_PRIM_FACTOR, hdr_meta->display_primaries[LAV_GREEN][1]);
+            p_pic->format.mastering.primaries[ST2086_BLUE*2  + 0] =
+                    FROM_AVRAT(ST2086_PRIM_FACTOR, hdr_meta->display_primaries[LAV_BLUE][0]);
+            p_pic->format.mastering.primaries[ST2086_BLUE*2  + 1] =
+                    FROM_AVRAT(ST2086_PRIM_FACTOR, hdr_meta->display_primaries[LAV_BLUE][1]);
+            p_pic->format.mastering.white_point[0] =
+                    FROM_AVRAT(ST2086_PRIM_FACTOR, hdr_meta->white_point[0]);
+            p_pic->format.mastering.white_point[1] =
+                    FROM_AVRAT(ST2086_PRIM_FACTOR, hdr_meta->white_point[1]);
+        }
+
+        if ( memcmp( &p_dec->fmt_out.video.mastering,
+                     &p_pic->format.mastering,
+                     sizeof(p_pic->format.mastering) ) )
+        {
+            p_dec->fmt_out.video.mastering = p_pic->format.mastering;
+            format_changed = true;
+        }
+#undef FROM_AVRAT
+    }
+#endif
+#if (LIBAVUTIL_VERSION_MICRO >= 100 && LIBAVUTIL_VERSION_INT >= AV_VERSION_INT( 55, 60, 100 ) )
+    const AVFrameSideData *metadata_lt =
+            av_frame_get_side_data( frame,
+                                    AV_FRAME_DATA_CONTENT_LIGHT_LEVEL );
+    if ( metadata_lt )
+    {
+        const AVContentLightMetadata *light_meta =
+                (const AVContentLightMetadata *) metadata_lt->data;
+        p_pic->format.lighting.MaxCLL = light_meta->MaxCLL;
+        p_pic->format.lighting.MaxFALL = light_meta->MaxFALL;
+        if ( memcmp( &p_dec->fmt_out.video.lighting,
+                     &p_pic->format.lighting,
+                     sizeof(p_pic->format.lighting) ) )
+        {
+            p_dec->fmt_out.video.lighting  = p_pic->format.lighting;
+            format_changed = true;
+        }
+    }
+#endif
+
+    if (format_changed && decoder_UpdateVideoFormat( p_dec ))
+        return -1;
+
+    const AVFrameSideData *p_avcc = av_frame_get_side_data( frame, AV_FRAME_DATA_A53_CC );
+    if( p_avcc )
+    {
+        cc_Extract( &p_sys->cc, CC_PAYLOAD_RAW, true, p_avcc->data, p_avcc->size );
+        if( p_sys->cc.b_reorder || p_sys->cc.i_data )
+        {
+            block_t *p_cc = block_Alloc( p_sys->cc.i_data );
+            if( p_cc )
+            {
+                memcpy( p_cc->p_buffer, p_sys->cc.p_data, p_sys->cc.i_data );
+                if( p_sys->cc.b_reorder )
+                    p_cc->i_dts = p_cc->i_pts = p_pic->date;
+                else
+                    p_cc->i_pts = p_cc->i_dts;
+                decoder_cc_desc_t desc;
+                desc.i_608_channels = p_sys->cc.i_608channels;
+                desc.i_708_channels = p_sys->cc.i_708channels;
+                desc.i_reorder_depth = 4;
+                decoder_QueueCc( p_dec, p_cc, &desc );
+            }
+            cc_Flush( &p_sys->cc );
+        }
+    }
+    return 0;
+}
+
+/*****************************************************************************
+ * DecodeBlock: Called to decode one or more frames
+ *****************************************************************************/
+
+static picture_t *DecodeBlock( decoder_t *p_dec, block_t **pp_block, bool *error )
+{
+    decoder_sys_t *p_sys = p_dec->p_sys;
+    AVCodecContext *p_context = p_sys->p_context;
+    /* Boolean if we assume that we should get valid pic as result */
+    bool b_need_output_picture = true;
+
+    /* Boolean for END_OF_SEQUENCE */
+    bool eos_spotted = false;
+
+#if TRACE_ALL
+    msg_Dbg(p_dec, "<<< %s: (buf_size=%d)", __func__, pp_block == NULL || *pp_block == NULL ? 0 : (*pp_block)->i_buffer);
+#endif
+
+    block_t *p_block;
+    mtime_t current_time;
+    picture_t *p_pic = NULL;
+    AVFrame *frame = NULL;
+
+    // By default we are OK
+    *error = false;
+
+    if( !p_context->extradata_size && p_dec->fmt_in.i_extra )
+    {
+        ffmpeg_InitCodec( p_dec );
+        if( !avcodec_is_open( p_context ) )
+            OpenVideoCodec( p_dec );
+    }
+
+    p_block = pp_block ? *pp_block : NULL;
+    if(!p_block && !(p_sys->p_codec->capabilities & AV_CODEC_CAP_DELAY) )
+        return NULL;
+
+    if( !avcodec_is_open( p_context ) )
+    {
+        if( p_block )
+            block_Release( p_block );
+        return NULL;
+    }
+
+    if( !check_block_validity( p_sys, p_block ) )
+        return NULL;
+
+    current_time = mdate();
+    if( p_dec->b_frame_drop_allowed &&  check_block_being_late( p_sys, p_block, current_time) )
+    {
+        msg_Err( p_dec, "more than 5 seconds of late video -> "
+                 "dropping frame (computer too slow ?)" );
+        return NULL;
+    }
+
+
+    /* A good idea could be to decode all I pictures and see for the other */
+
+    /* Defaults that if we aren't in prerolling, we want output picture
+       same for if we are flushing (p_block==NULL) */
+    if( !p_block || !(p_block->i_flags & BLOCK_FLAG_PREROLL) )
+        b_need_output_picture = true;
+    else
+        b_need_output_picture = false;
+
+    /* Change skip_frame config only if hurry_up is enabled */
+    if( p_sys->b_hurry_up )
+    {
+        p_context->skip_frame = p_sys->i_skip_frame;
+
+        /* Check also if we should/can drop the block and move to next block
+            as trying to catchup the speed*/
+        if( p_dec->b_frame_drop_allowed &&
+            check_frame_should_be_dropped( p_sys, p_context, &b_need_output_picture ) )
+        {
+            if( p_block )
+                block_Release( p_block );
+            msg_Warn( p_dec, "More than 11 late frames, dropping frame" );
+            return NULL;
+        }
+    }
+    if( !b_need_output_picture )
+    {
+        p_context->skip_frame = __MAX( p_context->skip_frame,
+                                              AVDISCARD_NONREF );
+    }
+
+    /*
+     * Do the actual decoding now */
+
+    /* Don't forget that libavcodec requires a little more bytes
+     * that the real frame size */
+    if( p_block && p_block->i_buffer > 0 )
+    {
+        eos_spotted = ( p_block->i_flags & BLOCK_FLAG_END_OF_SEQUENCE ) != 0;
+
+        p_block = block_Realloc( p_block, 0,
+                            p_block->i_buffer + FF_INPUT_BUFFER_PADDING_SIZE );
+        if( !p_block )
+            return NULL;
+        p_block->i_buffer -= FF_INPUT_BUFFER_PADDING_SIZE;
+        *pp_block = p_block;
+        memset( p_block->p_buffer + p_block->i_buffer, 0,
+                FF_INPUT_BUFFER_PADDING_SIZE );
+    }
+
+    while( !p_block || p_block->i_buffer > 0 || eos_spotted )
+    {
+        int i_used;
+        AVPacket pkt;
+
+        post_mt( p_sys );
+
+        av_init_packet( &pkt );
+        if( p_block && p_block->i_buffer > 0 )
+        {
+            pkt.data = p_block->p_buffer;
+            pkt.size = p_block->i_buffer;
+            pkt.pts = p_block->i_pts > VLC_TS_INVALID ? p_block->i_pts : AV_NOPTS_VALUE;
+            pkt.dts = p_block->i_dts > VLC_TS_INVALID ? p_block->i_dts : AV_NOPTS_VALUE;
+        }
+        else
+        {
+            /* Return delayed frames if codec has CODEC_CAP_DELAY */
+            pkt.data = NULL;
+            pkt.size = 0;
+        }
+
+        if( !p_sys->palette_sent )
+        {
+            uint8_t *pal = av_packet_new_side_data(&pkt, AV_PKT_DATA_PALETTE, AVPALETTE_SIZE);
+            if (pal) {
+                memcpy(pal, p_dec->fmt_in.video.p_palette->palette, AVPALETTE_SIZE);
+                p_sys->palette_sent = true;
+            }
+        }
+
+        /* Make sure we don't reuse the same timestamps twice */
+        if( p_block )
+        {
+            p_block->i_pts =
+            p_block->i_dts = VLC_TS_INVALID;
+        }
+
+        int ret = avcodec_send_packet(p_context, &pkt);
+        if( ret != 0 && ret != AVERROR(EAGAIN) )
+        {
+            if (ret == AVERROR(ENOMEM) || ret == AVERROR(EINVAL))
+            {
+                msg_Err(p_dec, "avcodec_send_packet critical error");
+                *error = true;
+            }
+            av_packet_unref( &pkt );
+            break;
+        }
+        i_used = ret != AVERROR(EAGAIN) ? pkt.size : 0;
+        av_packet_unref( &pkt );
+
+        frame = av_frame_alloc();
+        if (unlikely(frame == NULL))
+        {
+            *error = true;
+            break;
+        }
+
+        ret = avcodec_receive_frame(p_context, frame);
+        if( ret != 0 && ret != AVERROR(EAGAIN) )
+        {
+            msg_Dbg(p_dec, "No receive");
+            if (ret == AVERROR(ENOMEM) || ret == AVERROR(EINVAL))
+            {
+                msg_Err(p_dec, "avcodec_receive_frame critical error");
+                *error = true;
+            }
+            av_frame_free(&frame);
+            /* After draining, we need to reset decoder with a flush */
+            if( ret == AVERROR_EOF )
+                avcodec_flush_buffers( p_sys->p_context );
+            break;
+        }
+        bool not_received_frame = ret;
+
+        wait_mt( p_sys );
+
+        if( eos_spotted )
+            p_sys->b_first_frame = true;
+
+        if( p_block )
+        {
+            if( p_block->i_buffer <= 0 )
+                eos_spotted = false;
+
+            /* Consumed bytes */
+            p_block->p_buffer += i_used;
+            p_block->i_buffer -= i_used;
+        }
+
+        /* Nothing to display */
+        if( not_received_frame )
+        {
+//            msg_Dbg(p_dec, "No rx: used=%d", i_used);
+            av_frame_free(&frame);
+            if( i_used == 0 ) break;
+            continue;
+        }
+
+        /* Compute the PTS */
+#ifdef FF_API_PKT_PTS
+        mtime_t i_pts = frame->pts;
+#else
+        mtime_t i_pts = frame->pkt_pts;
+#endif
+        if (i_pts == AV_NOPTS_VALUE )
+            i_pts = frame->pkt_dts;
+
+        if( i_pts == AV_NOPTS_VALUE )
+            i_pts = date_Get( &p_sys->pts );
+
+        /* Interpolate the next PTS */
+        if( i_pts > VLC_TS_INVALID )
+            date_Set( &p_sys->pts, i_pts );
+
+        const mtime_t i_next_pts = interpolate_next_pts(p_dec, frame);
+
+        update_late_frame_count( p_dec, p_block, current_time, i_pts, i_next_pts);
+
+        if( !b_need_output_picture ||
+//            ( !p_sys->p_va && !frame->linesize[0] ) ||
+           ( !frame->linesize[0] ) ||
+           ( p_dec->b_frame_drop_allowed && (frame->flags & AV_FRAME_FLAG_CORRUPT) &&
+             !p_sys->b_show_corrupted ) )
+        {
+            av_frame_free(&frame);
+//            msg_Dbg(p_dec, "Bad frame");
+            continue;
+        }
+
+        if( p_context->pix_fmt == AV_PIX_FMT_PAL8
+         && !p_dec->fmt_out.video.p_palette )
+        {
+            /* See AV_PIX_FMT_PAL8 comment in avc_GetVideoFormat(): update the
+             * fmt_out palette and change the fmt_out chroma to request a new
+             * vout */
+            assert( p_dec->fmt_out.video.i_chroma != VLC_CODEC_RGBP );
+
+            video_palette_t *p_palette;
+            p_palette = p_dec->fmt_out.video.p_palette
+                      = malloc( sizeof(video_palette_t) );
+            if( !p_palette )
+            {
+                *error = true;
+                av_frame_free(&frame);
+                break;
+            }
+            static_assert( sizeof(p_palette->palette) == AVPALETTE_SIZE,
+                           "Palette size mismatch between vlc and libavutil" );
+            assert( frame->data[1] != NULL );
+            memcpy( p_palette->palette, frame->data[1], AVPALETTE_SIZE );
+            p_palette->i_entries = AVPALETTE_COUNT;
+            p_dec->fmt_out.video.i_chroma = VLC_CODEC_RGBP;
+            if( decoder_UpdateVideoFormat( p_dec ) )
+            {
+                av_frame_free(&frame);
+                continue;
+            }
+        }
+
+#if 1
+        {
+            cma_buf_t * const cb = av_rpi_zc_buf_v(frame->buf[0]);
+
+            if (cb == NULL)
+            {
+                msg_Err(p_dec, "Frame has no attached CMA buffer");
+                goto fail;
+            }
+
+            if (lavc_UpdateVideoFormat(p_dec, p_context, p_context->pix_fmt,
+                                       p_context->pix_fmt) != 0)
+            {
+                msg_Err(p_dec, "Failed to update format");
+                goto fail;
+            }
+
+            if ((p_pic = decoder_NewPicture(p_dec)) == NULL)
+            {
+                msg_Err(p_dec, "Failed to allocate pic");
+                goto fail;
+            }
+
+            if (cma_buf_pic_attach(cma_buf_ref(cb), p_pic) != 0)
+            {
+                cma_buf_unref(cb);  // Undo the in_flight
+                char dbuf0[5];
+                msg_Err(p_dec, "Failed to attach bufs to pic: fmt=%s", str_fourcc(dbuf0, p_pic->format.i_chroma));
+                goto fail;
+            }
+
+            // ****** Set planes etc.
+            set_pic_from_frame(p_pic, frame);
+        }
+#else
+        picture_t *p_pic = frame->opaque;
+        if( p_pic == NULL )
+        {   /* When direct rendering is not used, get_format() and get_buffer()
+             * might not be called. The output video format must be set here
+             * then picture buffer can be allocated. */
+            if (p_sys->p_va == NULL
+             && lavc_UpdateVideoFormat(p_dec, p_context, p_context->pix_fmt,
+                                       p_context->pix_fmt) == 0)
+                p_pic = decoder_NewPicture(p_dec);
+
+            if( !p_pic )
+            {
+                av_frame_free(&frame);
+                break;
+            }
+
+            /* Fill picture_t from AVFrame */
+            if( lavc_CopyPicture( p_dec, p_pic, frame ) != VLC_SUCCESS )
+            {
+                av_frame_free(&frame);
+                picture_Release( p_pic );
+                break;
+            }
+        }
+        else
+        {
+            /* Some codecs can return the same frame multiple times. By the
+             * time that the same frame is returned a second time, it will be
+             * too late to clone the underlying picture. So clone proactively.
+             * A single picture CANNOT be queued multiple times.
+             */
+            p_pic = picture_Clone( p_pic );
+            if( unlikely(p_pic == NULL) )
+            {
+                av_frame_free(&frame);
+                break;
+            }
+        }
+#endif
+
+        if( !p_dec->fmt_in.video.i_sar_num || !p_dec->fmt_in.video.i_sar_den )
+        {
+            /* Fetch again the aspect ratio in case it changed */
+            p_dec->fmt_out.video.i_sar_num
+                = p_context->sample_aspect_ratio.num;
+            p_dec->fmt_out.video.i_sar_den
+                = p_context->sample_aspect_ratio.den;
+
+            if( !p_dec->fmt_out.video.i_sar_num || !p_dec->fmt_out.video.i_sar_den )
+            {
+                p_dec->fmt_out.video.i_sar_num = 1;
+                p_dec->fmt_out.video.i_sar_den = 1;
+            }
+        }
+
+        p_pic->date = i_pts;
+        /* Hack to force display of still pictures */
+        p_pic->b_force = p_sys->b_first_frame;
+        p_pic->i_nb_fields = 2 + frame->repeat_pict;
+        p_pic->b_progressive = !frame->interlaced_frame;
+        p_pic->b_top_field_first = frame->top_field_first;
+
+        if (DecodeSidedata(p_dec, frame, p_pic))
+            i_pts = VLC_TS_INVALID;
+
+        av_frame_free(&frame);
+
+        /* Send decoded frame to vout */
+        if (i_pts > VLC_TS_INVALID)
+        {
+            p_sys->b_first_frame = false;
+#if TRACE_ALL
+            msg_Dbg(p_dec, ">>> %s: Got pic", __func__);
+#endif
+            return p_pic;
+        }
+        else
+            picture_Release( p_pic );
+    }
+
+    if( p_block )
+        block_Release( p_block );
+
+#if TRACE_ALL
+     msg_Dbg(p_dec, ">>> %s: NULL", __func__);
+#endif
+    return NULL;
+
+fail:
+#if TRACE_ALL
+     msg_Dbg(p_dec, ">>> %s: FAIL", __func__);
+#endif
+    av_frame_free(&frame);
+    if (p_pic != NULL)
+        picture_Release(p_pic);
+    if (p_block != NULL)
+        block_Release(p_block);
+    *error = true;
+    return NULL;
+}
+
+static int DecodeVideo( decoder_t *p_dec, block_t *p_block )
+{
+    block_t **pp_block = p_block ? &p_block : NULL;
+    picture_t *p_pic;
+    bool error = false;
+    while( ( p_pic = DecodeBlock( p_dec, pp_block, &error ) ) != NULL )
+        decoder_QueueVideo( p_dec, p_pic );
+    return error ? VLCDEC_ECRITICAL : VLCDEC_SUCCESS;
+}
+
+/*****************************************************************************
+ * EndVideo: decoder destruction
+ *****************************************************************************
+ * This function is called when the thread ends after a successful
+ * initialization.
+ *****************************************************************************/
+static void MmalAvcodecCloseDecoder( vlc_object_t *obj )
+{
+    decoder_t *p_dec = (decoder_t *)obj;
+    decoder_sys_t *p_sys = p_dec->p_sys;
+    AVCodecContext *ctx = p_sys->p_context;
+//    void *hwaccel_context;
+
+    msg_Dbg(obj, "<<< %s", __func__);
+
+    post_mt( p_sys );
+
+    cma_buf_pool_cancel(p_sys->cma_pool);  // Abort any pending frame allocs
+
+    /* do not flush buffers if codec hasn't been opened (theora/vorbis/VC1) */
+    if( avcodec_is_open( ctx ) )
+        avcodec_flush_buffers( ctx );
+
+    av_rpi_zc_uninit2(ctx);
+
+    wait_mt( p_sys );
+
+    cc_Flush( &p_sys->cc );
+
+//    hwaccel_context = ctx->hwaccel_context;
+    avcodec_free_context( &ctx );
+
+//    if( p_sys->p_va )
+//        vlc_va_Delete( p_sys->p_va, &hwaccel_context );
+
+    cma_vcsm_exit(p_sys->vcsm_init_type);
+
+    vlc_sem_destroy( &p_sys->sem_mt );
+    free( p_sys );
+}
+
+/*****************************************************************************
+ * ffmpeg_InitCodec: setup codec extra initialization data for ffmpeg
+ *****************************************************************************/
+static void ffmpeg_InitCodec( decoder_t *p_dec )
+{
+    decoder_sys_t *p_sys = p_dec->p_sys;
+    size_t i_size = p_dec->fmt_in.i_extra;
+
+    if( !i_size ) return;
+
+    if( p_sys->p_codec->id == AV_CODEC_ID_SVQ3 )
+    {
+        uint8_t *p;
+
+        p_sys->p_context->extradata_size = i_size + 12;
+        p = p_sys->p_context->extradata =
+            av_malloc( p_sys->p_context->extradata_size +
+                       FF_INPUT_BUFFER_PADDING_SIZE );
+        if( !p )
+            return;
+
+        memcpy( &p[0],  "SVQ3", 4 );
+        memset( &p[4], 0, 8 );
+        memcpy( &p[12], p_dec->fmt_in.p_extra, i_size );
+
+        /* Now remove all atoms before the SMI one */
+        if( p_sys->p_context->extradata_size > 0x5a &&
+            strncmp( (char*)&p[0x56], "SMI ", 4 ) )
+        {
+            uint8_t *psz = &p[0x52];
+
+            while( psz < &p[p_sys->p_context->extradata_size - 8] )
+            {
+                uint_fast32_t atom_size = GetDWBE( psz );
+                if( atom_size <= 1 )
+                {
+                    /* FIXME handle 1 as long size */
+                    break;
+                }
+                if( !strncmp( (char*)&psz[4], "SMI ", 4 ) )
+                {
+                    memmove( &p[0x52], psz,
+                             &p[p_sys->p_context->extradata_size] - psz );
+                    break;
+                }
+
+                psz += atom_size;
+            }
+        }
+    }
+    else
+    {
+        p_sys->p_context->extradata_size = i_size;
+        p_sys->p_context->extradata =
+            av_malloc( i_size + FF_INPUT_BUFFER_PADDING_SIZE );
+        if( p_sys->p_context->extradata )
+        {
+            memcpy( p_sys->p_context->extradata,
+                    p_dec->fmt_in.p_extra, i_size );
+            memset( p_sys->p_context->extradata + i_size,
+                    0, FF_INPUT_BUFFER_PADDING_SIZE );
+        }
+    }
+}
+
+
+vlc_module_begin()
+    set_category( CAT_INPUT )
+    set_subcategory( SUBCAT_INPUT_VCODEC )
+    set_shortname(N_("MMAL avcodec"))
+    set_description(N_("MMAL buffered avcodec "))
+    set_capability("video decoder", 80)
+    add_shortcut("mmal_avcodec")
+    add_integer(MMAL_AVCODEC_BUFFERS, -1, MMAL_AVCODEC_BUFFERS_TEXT,
+                    MMAL_AVCODEC_BUFFERS_LONGTEXT, true)
+    set_callbacks(MmalAvcodecOpenDecoder, MmalAvcodecCloseDecoder)
+vlc_module_end()
+
--- /dev/null
+++ b/modules/hw/mmal/mmal_cma.c
@@ -0,0 +1,668 @@
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdatomic.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <interface/vcsm/user-vcsm.h>
+
+#include <vlc_common.h>
+#include <vlc_picture.h>
+
+#include "mmal_cma.h"
+#include "mmal_picture.h"
+
+#include <assert.h>
+
+#define TRACE_ALL 0
+
+//-----------------------------------------------------------------------------
+//
+// Generic pool functions
+// Knows nothing about pool entries
+
+typedef void * cma_pool_alloc_fn(void * v, size_t size);
+typedef void cma_pool_free_fn(void * v, void * el, size_t size);
+
+#if TRACE_ALL
+static atomic_int pool_seq;
+#endif
+
+// Pool structure
+// Ref count is held by pool owner and pool els that have been got
+// Els in the pool do not count towards its ref count
+struct cma_pool_fixed_s
+{
+    atomic_int ref_count;
+
+    vlc_mutex_t lock;
+    unsigned int n_in;
+    unsigned int n_out;
+    unsigned int pool_size;
+    int flight_size;
+    size_t el_size;
+    void ** pool;
+
+    bool cancel;
+    int in_flight;
+    vlc_cond_t flight_cond;
+
+    void * alloc_v;
+    cma_pool_alloc_fn * el_alloc_fn;
+    cma_pool_free_fn * el_free_fn;
+    cma_pool_on_delete_fn * on_delete_fn;
+
+    const char * name;
+#if TRACE_ALL
+    int seq;
+#endif
+};
+
+static inline unsigned int inc_mod(const unsigned int n, const unsigned int m)
+{
+    return n + 1 >= m ? 0 : n + 1;
+}
+
+static void free_pool(const cma_pool_fixed_t * const p, void ** const pool,
+                      const unsigned int pool_size, const size_t el_size)
+{
+    if (pool == NULL)
+        return;
+
+    for (unsigned int n = 0; n != pool_size; ++n)
+        if (pool[n] != NULL)
+            p->el_free_fn(p->alloc_v, pool[n], el_size);
+    free(pool);
+}
+
+// Just kill this - no checks
+static void cma_pool_fixed_delete(cma_pool_fixed_t * const p)
+{
+    cma_pool_on_delete_fn *const on_delete_fn = p->on_delete_fn;
+    void *const v = p->alloc_v;
+
+    free_pool(p, p->pool, p->pool_size, p->el_size);
+
+    if (p->name != NULL)
+        free((void *)p->name);  // Discard const
+
+    vlc_cond_destroy(&p->flight_cond);
+    vlc_mutex_destroy(&p->lock);
+    free(p);
+
+    // Inform our container that we are dead (if it cares)
+    if (on_delete_fn)
+        on_delete_fn(v);
+}
+
+static void cma_pool_fixed_unref(cma_pool_fixed_t * const p)
+{
+    if (atomic_fetch_sub(&p->ref_count, 1) <= 1)
+        cma_pool_fixed_delete(p);
+}
+
+static void cma_pool_fixed_ref(cma_pool_fixed_t * const p)
+{
+    atomic_fetch_add(&p->ref_count, 1);
+}
+
+static void cma_pool_fixed_inc_in_flight(cma_pool_fixed_t * const p)
+{
+    vlc_mutex_lock(&p->lock);
+    ++p->in_flight;
+    vlc_mutex_unlock(&p->lock);
+}
+
+static void cma_pool_fixed_dec_in_flight(cma_pool_fixed_t * const p)
+{
+    vlc_mutex_lock(&p->lock);
+    if (--p->in_flight == 0)
+        vlc_cond_signal(&p->flight_cond);
+    vlc_mutex_unlock(&p->lock);
+}
+
+static void * cma_pool_fixed_get(cma_pool_fixed_t * const p, const size_t req_el_size, const bool inc_flight, const bool no_pool)
+{
+    void * v = NULL;
+
+    vlc_mutex_lock(&p->lock);
+
+    for (;;)
+    {
+        if (req_el_size != p->el_size)
+        {
+            void ** const deadpool = p->pool;
+            const size_t dead_size = p->el_size;
+            const unsigned int dead_n = p->pool_size;
+
+            p->pool = NULL;
+            p->n_in = 0;
+            p->n_out = 0;
+            p->el_size = req_el_size;
+
+            if (deadpool != NULL)
+            {
+                vlc_mutex_unlock(&p->lock);
+                // Do the free old op outside the mutex in case the free is slow
+                free_pool(p, deadpool, dead_n, dead_size);
+                vlc_mutex_lock(&p->lock);
+                continue;
+            }
+        }
+
+        // Late abort if flush or cancel so we can still kill the pool
+        if (req_el_size == 0 || p->cancel)
+        {
+            vlc_mutex_unlock(&p->lock);
+            return NULL;
+        }
+
+        if (p->pool != NULL && !no_pool)
+        {
+            v = p->pool[p->n_in];
+            if (v != NULL)
+            {
+                p->pool[p->n_in] = NULL;
+                p->n_in = inc_mod(p->n_in, p->pool_size);
+                break;
+            }
+        }
+
+        if (p->in_flight <= 0)
+            break;
+
+        vlc_cond_wait(&p->flight_cond, &p->lock);
+    }
+
+    if (inc_flight)
+        ++p->in_flight;
+
+    vlc_mutex_unlock(&p->lock);
+
+    if (v == NULL && req_el_size != 0)
+        v = p->el_alloc_fn(p->alloc_v, req_el_size);
+
+    // Tag ref
+    if (v != NULL)
+        cma_pool_fixed_ref(p);
+    // Remove flight if we set it and error
+    else if (inc_flight)
+        cma_pool_fixed_dec_in_flight(p);
+
+    return v;
+}
+
+static void cma_pool_fixed_put(cma_pool_fixed_t * const p, void * v, const size_t el_size, const bool was_in_flight)
+{
+    vlc_mutex_lock(&p->lock);
+
+    if (el_size == p->el_size && (p->pool == NULL || p->pool[p->n_out] == NULL))
+    {
+        if (p->pool == NULL)
+            p->pool = calloc(p->pool_size, sizeof(void*));
+
+        p->pool[p->n_out] = v;
+        p->n_out = inc_mod(p->n_out, p->pool_size);
+        v = NULL;
+    }
+
+    if (was_in_flight)
+        --p->in_flight;
+
+    vlc_mutex_unlock(&p->lock);
+
+    vlc_cond_signal(&p->flight_cond);
+
+    if (v != NULL)
+        p->el_free_fn(p->alloc_v, v, el_size);
+
+    cma_pool_fixed_unref(p);
+}
+
+static int cma_pool_fixed_resize(cma_pool_fixed_t * const p,
+                           const unsigned int new_pool_size, const int new_flight_size)
+{
+    void ** dead_pool = NULL;
+    size_t dead_size = 0;
+    unsigned int dead_n = 0;
+
+    // This makes this non-reentrant but saves us a lot of time in the normal
+    // "nothing happens" case
+    if (p->pool_size == new_pool_size && p->flight_size == new_flight_size)
+        return 0;
+
+    vlc_mutex_lock(&p->lock);
+
+    if (p->pool != NULL && new_pool_size != p->pool_size)
+    {
+        void ** const new_pool = calloc(new_pool_size, sizeof(void*));
+        unsigned int d, s;
+        dead_pool = p->pool;
+        dead_size = p->el_size;
+        dead_n = p->pool_size;
+
+        if (new_pool == NULL)
+        {
+            vlc_mutex_unlock(&p->lock);
+            return -1;
+        }
+
+        for (d = 0, s = p->n_in; d != new_pool_size && (new_pool[d] = dead_pool[s]) != NULL; ++d, s = inc_mod(s, dead_n))
+            dead_pool[s] = NULL;
+
+        p->n_out = 0;
+        p->n_in = (d != new_pool_size) ? d : 0;
+        p->pool = new_pool;
+    }
+
+    p->pool_size = new_pool_size;
+    if (new_flight_size > p->flight_size)
+        vlc_cond_broadcast(&p->flight_cond);  // Lock still active so nothing happens till we release it
+    p->in_flight += p->flight_size - new_flight_size;
+    p->flight_size = new_flight_size;
+
+    vlc_mutex_unlock(&p->lock);
+
+    free_pool(p, dead_pool, dead_n, dead_size);
+    return 0;
+}
+
+static int cma_pool_fixed_fill(cma_pool_fixed_t * const p, const size_t el_size)
+{
+    for (;;)
+    {
+        vlc_mutex_lock(&p->lock);
+        bool done = el_size == p->el_size && p->pool != NULL && p->pool[p->n_out] != NULL;
+        vlc_mutex_unlock(&p->lock);
+        if (done)
+            break;
+        void * buf = cma_pool_fixed_get(p, el_size, false, true);
+        if (buf == NULL)
+            return -ENOMEM;
+        cma_pool_fixed_put(p, buf, el_size, false);
+    }
+    return 0;
+}
+
+static void cma_pool_fixed_cancel(cma_pool_fixed_t * const p)
+{
+    vlc_mutex_lock(&p->lock);
+    p->cancel = true;
+    vlc_cond_broadcast(&p->flight_cond);
+    vlc_mutex_unlock(&p->lock);
+}
+
+static void cma_pool_fixed_uncancel(cma_pool_fixed_t * const p)
+{
+    vlc_mutex_lock(&p->lock);
+    p->cancel = false;
+    vlc_mutex_unlock(&p->lock);
+}
+
+
+// Purge pool & unref
+static void cma_pool_fixed_kill(cma_pool_fixed_t * const p)
+{
+    if (p == NULL)
+        return;
+
+    // This flush is not strictly needed but it reclaims what memory we can reclaim asap
+    cma_pool_fixed_get(p, 0, false, false);
+    cma_pool_fixed_unref(p);
+}
+
+// Create a new pool
+static cma_pool_fixed_t*
+cma_pool_fixed_new(const unsigned int pool_size,
+                   const int flight_size,
+                   void * const alloc_v,
+                   cma_pool_alloc_fn * const alloc_fn, cma_pool_free_fn * const free_fn,
+                   cma_pool_on_delete_fn * const on_delete_fn,
+                   const char * const name)
+{
+    cma_pool_fixed_t* const p = calloc(1, sizeof(cma_pool_fixed_t));
+    if (p == NULL)
+        return NULL;
+
+    atomic_store(&p->ref_count, 1);
+    vlc_mutex_init(&p->lock);
+    vlc_cond_init(&p->flight_cond);
+
+    p->pool_size = pool_size;
+    p->flight_size = flight_size;
+    p->in_flight = -flight_size;
+
+    p->alloc_v = alloc_v;
+    p->el_alloc_fn = alloc_fn;
+    p->el_free_fn = free_fn;
+    p->on_delete_fn = on_delete_fn;
+    p->name = name == NULL ? NULL : strdup(name);
+#if TRACE_ALL
+    p->seq = atomic_fetch_add(&pool_seq, 1);
+#endif
+
+    return p;
+}
+
+// ---------------------------------------------------------------------------
+//
+// CMA buffer functions - uses cma_pool_fixed for pooling
+
+struct cma_buf_pool_s {
+    cma_pool_fixed_t * pool;
+    vcsm_init_type_t init_type;
+
+    bool all_in_flight;
+#if TRACE_ALL
+    size_t alloc_n;
+    size_t alloc_size;
+#endif
+};
+
+typedef struct cma_buf_s {
+    atomic_int ref_count;
+    cma_buf_pool_t * cbp;
+    bool in_flight;
+    size_t size;
+    unsigned int vcsm_h;   // VCSM handle from initial alloc
+    unsigned int vc_h;     // VC handle for ZC mmal buffers
+    unsigned int vc_addr;  // VC addr - unused by us but wanted by FFmpeg
+    int fd;                // dmabuf handle for GL
+    void * mmap;           // ARM mapped address
+    picture_context_t *ctx2;
+} cma_buf_t;
+
+static void cma_pool_delete(cma_buf_t * const cb)
+{
+    assert(atomic_load(&cb->ref_count) == 0);
+#if TRACE_ALL
+    cb->cbp->alloc_size -= cb->size;
+    --cb->cbp->alloc_n;
+    fprintf(stderr, "%s[%d:%s]: N=%d, Total=%d\n", __func__, cb->cbp->pool->seq, cb->cbp->pool->name, cb->cbp->alloc_n, cb->cbp->alloc_size);
+#endif
+
+    if (cb->ctx2 != NULL)
+        cb->ctx2->destroy(cb->ctx2);
+
+    if (cb->mmap != MAP_FAILED)
+    {
+        if (cb->cbp->init_type == VCSM_INIT_CMA)
+            munmap(cb->mmap, cb->size);
+        else
+            vcsm_unlock_hdl(cb->vcsm_h);
+    }
+    if (cb->fd != -1)
+        close(cb->fd);
+    if (cb->vcsm_h != 0)
+        vcsm_free(cb->vcsm_h);
+    free(cb);
+}
+
+static void cma_pool_free_cb(void * v, void * el, size_t size)
+{
+    VLC_UNUSED(v);
+    VLC_UNUSED(size);
+
+    cma_pool_delete(el);
+}
+
+static void * cma_pool_alloc_cb(void * v, size_t size)
+{
+    cma_buf_pool_t * const cbp = v;
+
+    cma_buf_t * const cb = malloc(sizeof(cma_buf_t));
+    if (cb == NULL)
+        return NULL;
+
+    *cb = (cma_buf_t){
+        .ref_count = ATOMIC_VAR_INIT(0),
+        .cbp = cbp,
+        .in_flight = 0,
+        .size = size,
+        .vcsm_h = 0,
+        .vc_h = 0,
+        .fd = -1,
+        .mmap = MAP_FAILED,
+        .ctx2 = NULL
+    };
+#if TRACE_ALL
+    cb->cbp->alloc_size += cb->size;
+    ++cb->cbp->alloc_n;
+    fprintf(stderr, "%s[%d:%s]: N=%d, Total=%d\n", __func__, cbp->pool->seq, cbp->pool->name, cbp->alloc_n, cbp->alloc_size);
+#endif
+
+    // 0x80 is magic value to force full ARM-side mapping - otherwise
+    // cache requests can cause kernel crashes
+    if ((cb->vcsm_h = vcsm_malloc_cache(size, VCSM_CACHE_TYPE_HOST | 0x80, "VLC frame")) == 0)
+    {
+#if TRACE_ALL
+        fprintf(stderr, "vcsm_malloc_cache fail\n");
+#endif
+        goto fail;
+    }
+
+    if ((cb->vc_h = vcsm_vc_hdl_from_hdl(cb->vcsm_h)) == 0)
+    {
+#if TRACE_ALL
+        fprintf(stderr, "vcsm_vc_hdl_from_hdl fail\n");
+#endif
+        goto fail;
+    }
+
+    if (cbp->init_type == VCSM_INIT_CMA)
+    {
+        if ((cb->fd = vcsm_export_dmabuf(cb->vcsm_h)) == -1)
+        {
+#if TRACE_ALL
+            fprintf(stderr, "vcsm_export_dmabuf fail\n");
+#endif
+            goto fail;
+        }
+
+        if ((cb->mmap = mmap(NULL, cb->size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, cb->fd, 0)) == MAP_FAILED)
+            goto fail;
+    }
+    else
+    {
+        void * arm_addr;
+        if ((arm_addr = vcsm_lock(cb->vcsm_h)) == NULL)
+        {
+#if TRACE_ALL
+            fprintf(stderr, "vcsm_lock fail\n");
+#endif
+            goto fail;
+        }
+        cb->mmap = arm_addr;
+    }
+
+    cb->vc_addr = vcsm_vc_addr_from_hdl(cb->vcsm_h);
+
+    return cb;
+
+fail:
+    cma_pool_delete(cb);
+    return NULL;
+}
+
+// Pool has died - safe now to exit vcsm
+static void cma_buf_pool_on_delete_cb(void * v)
+{
+    cma_buf_pool_t * const cbp = v;
+
+    cma_vcsm_exit(cbp->init_type);
+    free(cbp);
+}
+
+void cma_buf_pool_cancel(cma_buf_pool_t * const cbp)
+{
+    if (cbp == NULL || cbp->pool == NULL)
+        return;
+
+    cma_pool_fixed_cancel(cbp->pool);
+}
+
+void cma_buf_pool_uncancel(cma_buf_pool_t * const cbp)
+{
+    if (cbp == NULL || cbp->pool == NULL)
+        return;
+
+    cma_pool_fixed_uncancel(cbp->pool);
+}
+
+// User finished with pool
+void cma_buf_pool_delete(cma_buf_pool_t * const cbp)
+{
+    if (cbp == NULL)
+        return;
+
+    if (cbp->pool != NULL)
+    {
+        // We will call cma_buf_pool_on_delete_cb when the pool finally dies
+        // (might be now) which will free up our env.
+        cma_pool_fixed_kill(cbp->pool);
+    }
+    else
+    {
+        // Had no pool for some reason (error) but must still finish cleanup
+        cma_buf_pool_on_delete_cb(cbp);
+    }
+}
+
+int cma_buf_pool_fill(cma_buf_pool_t * const cbp, const size_t el_size)
+{
+    return cma_pool_fixed_fill(cbp->pool, el_size);
+}
+
+int cma_buf_pool_resize(cma_buf_pool_t * const cbp,
+                        const unsigned int new_pool_size, const int new_flight_size)
+{
+    return cma_pool_fixed_resize(cbp->pool, new_pool_size, new_flight_size);
+}
+
+cma_buf_pool_t * cma_buf_pool_new(const unsigned int pool_size, const unsigned int flight_size, const bool all_in_flight, const char * const name)
+{
+    vcsm_init_type_t const init_type = cma_vcsm_init();
+    if (init_type == VCSM_INIT_NONE)
+        return NULL;
+
+    cma_buf_pool_t * const cbp = calloc(1, sizeof(cma_buf_pool_t));
+    if (cbp == NULL)
+        return NULL;
+
+    cbp->init_type = init_type;
+    cbp->all_in_flight = all_in_flight;
+
+    if ((cbp->pool = cma_pool_fixed_new(pool_size, flight_size, cbp, cma_pool_alloc_cb, cma_pool_free_cb, cma_buf_pool_on_delete_cb, name)) == NULL)
+        goto fail;
+    return cbp;
+
+fail:
+    cma_buf_pool_delete(cbp);
+    return NULL;
+}
+
+
+void cma_buf_in_flight(cma_buf_t * const cb)
+{
+    if (!cb->cbp->all_in_flight)
+    {
+        assert(!cb->in_flight);
+        cb->in_flight = true;
+        cma_pool_fixed_inc_in_flight(cb->cbp->pool);
+    }
+}
+
+void cma_buf_end_flight(cma_buf_t * const cb)
+{
+    if (cb != NULL && !cb->cbp->all_in_flight && cb->in_flight)
+    {
+        cb->in_flight = false;
+        cma_pool_fixed_dec_in_flight(cb->cbp->pool);
+    }
+}
+
+
+// Return vcsm handle
+unsigned int cma_buf_vcsm_handle(const cma_buf_t * const cb)
+{
+    return cb->vcsm_h;
+}
+
+size_t cma_buf_size(const cma_buf_t * const cb)
+{
+    return cb->size;
+}
+
+int cma_buf_add_context2(cma_buf_t *const cb, picture_context_t * const ctx2)
+{
+    if (cb->ctx2 != NULL)
+        return VLC_EGENERIC;
+
+    cb->ctx2 = ctx2;
+    return VLC_SUCCESS;
+}
+
+unsigned int cma_buf_vc_handle(const cma_buf_t *const cb)
+{
+    return cb->vc_h;
+}
+
+int cma_buf_fd(const cma_buf_t *const cb)
+{
+    return cb->fd;
+}
+
+void * cma_buf_addr(const cma_buf_t *const cb)
+{
+    return cb->mmap;
+}
+
+unsigned int cma_buf_vc_addr(const cma_buf_t *const cb)
+{
+    return cb->vc_addr;
+}
+
+
+picture_context_t * cma_buf_context2(const cma_buf_t *const cb)
+{
+    return cb->ctx2;
+}
+
+
+void cma_buf_unref(cma_buf_t * const cb)
+{
+    if (cb == NULL)
+        return;
+    if (atomic_fetch_sub(&cb->ref_count, 1) <= 1)
+    {
+        const bool was_in_flight = cb->in_flight;
+        cb->in_flight = false;
+        cma_pool_fixed_put(cb->cbp->pool, cb, cb->size, was_in_flight);
+    }
+}
+
+cma_buf_t * cma_buf_ref(cma_buf_t * const cb)
+{
+    if (cb == NULL)
+        return NULL;
+    atomic_fetch_add(&cb->ref_count, 1);
+    return cb;
+}
+
+cma_buf_t * cma_buf_pool_alloc_buf(cma_buf_pool_t * const cbp, const size_t size)
+{
+    cma_buf_t *const cb = cma_pool_fixed_get(cbp->pool, size, cbp->all_in_flight, false);
+
+    if (cb == NULL)
+        return NULL;
+
+    cb->in_flight = cbp->all_in_flight;
+    // When 1st allocated or retrieved from the pool the block will have a
+    // ref count of 0 so ref here
+    return cma_buf_ref(cb);
+}
+
--- /dev/null
+++ b/modules/hw/mmal/mmal_cma.h
@@ -0,0 +1,71 @@
+#ifndef VLC_MMAL_MMAL_CMA_H_
+#define VLC_MMAL_MMAL_CMA_H_
+
+
+struct cma_pool_fixed_s;
+typedef struct cma_pool_fixed_s cma_pool_fixed_t;
+
+typedef void * cma_pool_alloc_fn(void * v, size_t size);
+typedef void cma_pool_free_fn(void * v, void * el, size_t size);
+typedef void cma_pool_on_delete_fn(void * v);
+
+#if 0
+void cma_pool_fixed_unref(cma_pool_fixed_t * const p);
+void cma_pool_fixed_ref(cma_pool_fixed_t * const p);
+void * cma_pool_fixed_get(cma_pool_fixed_t * const p, const size_t req_el_size, const bool in_flight);
+void cma_pool_fixed_put(cma_pool_fixed_t * const p, void * v, const size_t el_size, const bool was_in_flight);
+void cma_pool_fixed_inc_in_flight(cma_pool_fixed_t * const p);
+void cma_pool_fixed_dec_in_flight(cma_pool_fixed_t * const p);
+void cma_pool_fixed_cancel(cma_pool_fixed_t * const p);
+void cma_pool_fixed_uncancel(cma_pool_fixed_t * const p);
+void cma_pool_fixed_kill(cma_pool_fixed_t * const p);
+int cma_pool_fixed_resize(cma_pool_fixed_t * const p,
+                          const unsigned int new_pool_size, const int new_flight_size);
+cma_pool_fixed_t * cma_pool_fixed_new(const unsigned int pool_size,
+                   const int flight_size,
+                   void * const alloc_v,
+                   cma_pool_alloc_fn * const alloc_fn, cma_pool_free_fn * const free_fn,
+                   cma_pool_on_delete_fn * const on_delete_fn,
+                   const char * const name);
+#endif
+
+struct cma_buf_s;
+typedef struct cma_buf_s cma_buf_t;
+
+void cma_buf_in_flight(cma_buf_t * const cb);
+void cma_buf_end_flight(cma_buf_t * const cb);
+unsigned int cma_buf_vcsm_handle(const cma_buf_t * const cb);
+size_t cma_buf_size(const cma_buf_t * const cb);
+int cma_buf_add_context2(cma_buf_t *const cb, picture_context_t * const ctx2);
+unsigned int cma_buf_vc_handle(const cma_buf_t *const cb);
+int cma_buf_fd(const cma_buf_t *const cb);
+void * cma_buf_addr(const cma_buf_t *const cb);
+unsigned int cma_buf_vc_addr(const cma_buf_t *const cb);
+picture_context_t * cma_buf_context2(const cma_buf_t *const cb);
+
+void cma_buf_unref(cma_buf_t * const cb);
+cma_buf_t * cma_buf_ref(cma_buf_t * const cb);
+
+struct cma_buf_pool_s;
+typedef struct cma_buf_pool_s cma_buf_pool_t;
+
+cma_buf_t * cma_buf_pool_alloc_buf(cma_buf_pool_t * const p, const size_t size);
+void cma_buf_pool_cancel(cma_buf_pool_t * const cbp);
+void cma_buf_pool_uncancel(cma_buf_pool_t * const cbp);
+void cma_buf_pool_delete(cma_buf_pool_t * const p);
+int cma_buf_pool_fill(cma_buf_pool_t * const cbp, const size_t el_size);
+int cma_buf_pool_resize(cma_buf_pool_t * const cbp,
+                          const unsigned int new_pool_size, const int new_flight_size);
+cma_buf_pool_t * cma_buf_pool_new(const unsigned int pool_size, const unsigned int flight_size,
+                                  const bool all_in_flight, const char * const name);
+
+static inline void cma_buf_pool_deletez(cma_buf_pool_t ** const pp)
+{
+    cma_buf_pool_t * const p = *pp;
+    if (p != NULL) {
+        *pp = NULL;
+        cma_buf_pool_delete(p);
+    }
+}
+
+#endif // VLC_MMAL_MMAL_CMA_H_
--- /dev/null
+++ b/modules/hw/mmal/mmal_gl.h
@@ -0,0 +1,45 @@
+// Trim this include list!
+
+#include <libdrm/drm.h>
+#include <libdrm/drm_mode.h>
+#include <libdrm/drm_fourcc.h>
+//#include <xf86drm.h>
+//#include <xf86drmMode.h>
+#include <X11/Xlib.h>
+#include <X11/Xutil.h>
+#include <X11/Xlib-xcb.h>
+#include <epoxy/gl.h>
+#include <epoxy/egl.h>
+#include <xcb/xcb.h>
+#include <xcb/dri3.h>
+
+struct mmal_gl_converter_s;
+
+typedef struct cma_buf_s {
+    struct mmal_gl_converter_s * sys;
+
+    size_t size;
+    __u32 h_dumb;
+    int fd;
+    unsigned int h_vcsm;
+    void * mapped_addr;
+    GLuint texture;
+} cma_buf_t;
+
+typedef struct cma_pic_sys_s {
+    cma_buf_t * cmabuf;
+} cma_pic_sys_t;
+
+static inline unsigned int
+hw_mmal_h_vcsm(const picture_t * const pic)
+{
+    const cma_pic_sys_t *const pic_sys = (cma_pic_sys_t *)pic->p_sys;
+
+    if (pic->format.i_chroma != VLC_CODEC_MMAL_GL_RGB32 ||
+        pic_sys == NULL || pic_sys->cmabuf == NULL) {
+        return 0;
+    }
+
+    return pic_sys->cmabuf->h_vcsm;
+}
+
--- /dev/null
+++ b/modules/hw/mmal/mmal_piccpy_neon.S
@@ -0,0 +1,105 @@
+// Copy pix
+
+        .syntax unified
+        .arm
+//      .thumb
+        .text
+        .align 16
+        .arch armv7-a
+        .fpu neon-vfpv4
+
+
+.macro  function name
+        .global  \name
+#ifdef __ELF__
+        .type    \name, %function
+#endif
+\name:
+.endm
+
+
+.macro  piccpy_to_8, bit_depth
+        subs     r2, #128
+        vpush    {q4-q7}
+        blt      2f
+1:
+        vldm     r1!, {q0-q7}
+        subs     r2, #128
+        vqrshrn.u16 d0,  q0,  #\bit_depth - 8
+        vqrshrn.u16 d1,  q1,  #\bit_depth - 8
+        vqrshrn.u16 d2,  q2,  #\bit_depth - 8
+        vqrshrn.u16 d3,  q3,  #\bit_depth - 8
+        vldm     r1!, {q8-q15}
+        vqrshrn.u16 d4,  q4,  #\bit_depth - 8
+        vqrshrn.u16 d5,  q5,  #\bit_depth - 8
+        vqrshrn.u16 d6,  q6,  #\bit_depth - 8
+        vqrshrn.u16 d7,  q7,  #\bit_depth - 8
+        vqrshrn.u16 d8,  q8,  #\bit_depth - 8
+        vqrshrn.u16 d9,  q9,  #\bit_depth - 8
+        vqrshrn.u16 d10, q10, #\bit_depth - 8
+        vqrshrn.u16 d11, q11, #\bit_depth - 8
+        vqrshrn.u16 d12, q12, #\bit_depth - 8
+        vqrshrn.u16 d13, q13, #\bit_depth - 8
+        vqrshrn.u16 d14, q14, #\bit_depth - 8
+        vqrshrn.u16 d15, q15, #\bit_depth - 8
+        vstm     r0!, {q0-q7}
+        bge      1b
+2:
+        adds     r2, #64
+        blt      1f
+
+        vldm     r1!, {q0-q7}
+        vqrshrn.u16 d0,  q0,  #\bit_depth - 8
+        vqrshrn.u16 d1,  q1,  #\bit_depth - 8
+        vqrshrn.u16 d2,  q2,  #\bit_depth - 8
+        vqrshrn.u16 d3,  q3,  #\bit_depth - 8
+        vqrshrn.u16 d4,  q4,  #\bit_depth - 8
+        vqrshrn.u16 d5,  q5,  #\bit_depth - 8
+        vqrshrn.u16 d6,  q6,  #\bit_depth - 8
+        vqrshrn.u16 d7,  q7,  #\bit_depth - 8
+        vstm     r0!, {q0-q3}
+1:
+        adds     r2, #32
+        blt      1f
+
+        vldm     r1!, {q0-q3}
+        vqrshrn.u16 d0,  q0,  #\bit_depth - 8
+        vqrshrn.u16 d1,  q1,  #\bit_depth - 8
+        vqrshrn.u16 d2,  q2,  #\bit_depth - 8
+        vqrshrn.u16 d3,  q3,  #\bit_depth - 8
+        vstm     r0!, {q0-q1}
+1:
+        adds     r2, #16
+        blt      1f
+
+        vldm     r1!, {q0-q1}
+        vqrshrn.u16 d0,  q0,  #\bit_depth - 8
+        vqrshrn.u16 d1,  q1,  #\bit_depth - 8
+        vstm     r0!, {q0}
+1:
+        adds     r2, #8
+        blt      1f
+
+        vldm     r1!, {q0}
+        vqrshrn.u16 d0,  q0,  #\bit_depth - 8
+        vstr     d0, [r0]
+        add      r0, #8
+1:
+        adds     r2, #4
+        blt      1f
+
+        vldr     d0, [r1]
+        vqrshrn.u16 d0,  q0,  #\bit_depth - 8
+        vstr     s0, [r0]
+1:
+        vpop     {q4-q7}
+        bx       lr
+.endm
+
+
+@ [r0] Dest
+@ [r1] Src
+@  r2  Pels
+function mmal_piccpy_10_to_8_neon
+        piccpy_to_8 10
+
--- a/modules/hw/mmal/mmal_picture.c
+++ b/modules/hw/mmal/mmal_picture.c
@@ -21,25 +21,1509 @@
  * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
  *****************************************************************************/
 
+// We would really like to use vlc_thread.h but the detach thread stuff can't be
+// used here :-(
+#include <pthread.h>
+
+#include <stdatomic.h>
+#include <unistd.h>
+#include <fcntl.h>
+
 #include <vlc_common.h>
+#include <vlc_cpu.h>
 #include <vlc_picture.h>
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wbad-function-cast"
+#include <bcm_host.h>
+#pragma GCC diagnostic pop
 #include <interface/mmal/mmal.h>
+#include <interface/mmal/util/mmal_util.h>
+#include <interface/mmal/util/mmal_default_components.h>
+#include <interface/vmcs_host/vcgencmd.h>
+#include <interface/vcsm/user-vcsm.h>
 
+#include "mmal_cma.h"
 #include "mmal_picture.h"
 
-int mmal_picture_lock(picture_t *picture)
+#define UINT64_SIZE(s) (((s) + sizeof(uint64_t) - 1)/sizeof(uint64_t))
+
+static inline char safe_char(const unsigned int c0)
+{
+    const unsigned int c = c0 & 0xff;
+    return c > ' ' && c < 0x7f ? c : '.';
+}
+
+const char * str_fourcc(char * const buf, const unsigned int fcc)
+{
+    if (fcc == 0)
+        return "----";
+    buf[0] = safe_char(fcc >> 0);
+    buf[1] = safe_char(fcc >> 8);
+    buf[2] = safe_char(fcc >> 16);
+    buf[3] = safe_char(fcc >> 24);
+    buf[4] = 0;
+    return buf;
+}
+
+// WB + Inv
+static inline void flush_range(void * const start, const size_t len)
+{
+    uint64_t buf[UINT64_SIZE(sizeof(struct vcsm_user_clean_invalid2_s) + sizeof(struct vcsm_user_clean_invalid2_block_s))];
+    struct vcsm_user_clean_invalid2_s * const b = (struct vcsm_user_clean_invalid2_s *)buf;
+
+    *b = (struct vcsm_user_clean_invalid2_s){
+        .op_count = 1
+    };
+
+    b->s[0] = (struct vcsm_user_clean_invalid2_block_s){
+        .invalidate_mode = 3,   // wb + invalidate
+        .block_count = 1,
+        .start_address = start, // Rely on clean inv to fix up align & size boundries
+        .block_size = len,
+        .inter_block_stride = 0
+    };
+
+    vcsm_clean_invalid2(b);
+}
+
+MMAL_FOURCC_T vlc_to_mmal_color_space(const video_color_space_t vlc_cs)
+{
+    switch (vlc_cs)
+    {
+        case COLOR_SPACE_BT601:
+            return MMAL_COLOR_SPACE_ITUR_BT601;
+        case COLOR_SPACE_BT709:
+            return MMAL_COLOR_SPACE_ITUR_BT709;
+        default:
+            break;
+    }
+    return MMAL_COLOR_SPACE_UNKNOWN;
+}
+
+MMAL_FOURCC_T vlc_to_mmal_video_fourcc(const video_frame_format_t * const vf_vlc)
+{
+    switch (vf_vlc->i_chroma) {
+        case VLC_CODEC_MMAL_ZC_RGB32:
+        case VLC_CODEC_RGB32:
+        {
+            // VLC RGB32 aka RV32 means we have to look at the mask values
+            const uint32_t r = vf_vlc->i_rmask;
+            const uint32_t g = vf_vlc->i_gmask;
+            const uint32_t b = vf_vlc->i_bmask;
+            if (r == 0xff0000 && g == 0xff00 && b == 0xff)
+                return MMAL_ENCODING_BGRA;
+            if (r == 0xff && g == 0xff00 && b == 0xff0000)
+                return MMAL_ENCODING_RGBA;
+            if (r == 0xff000000 && g == 0xff0000 && b == 0xff00)
+                return MMAL_ENCODING_ABGR;
+            if (r == 0xff00 && g == 0xff0000 && b == 0xff000000)
+                return MMAL_ENCODING_ARGB;
+            break;
+        }
+        case VLC_CODEC_RGB16:
+        {
+            // VLC RGB16 aka RV16 means we have to look at the mask values
+            const uint32_t r = vf_vlc->i_rmask;
+            const uint32_t g = vf_vlc->i_gmask;
+            const uint32_t b = vf_vlc->i_bmask;
+            if (r == 0xf800 && g == 0x7e0 && b == 0x1f)
+                return MMAL_ENCODING_RGB16;
+            break;
+        }
+        case VLC_CODEC_I420:
+        case VLC_CODEC_MMAL_ZC_I420:
+            return MMAL_ENCODING_I420;
+        case VLC_CODEC_RGBA:
+            return MMAL_ENCODING_RGBA;
+        case VLC_CODEC_BGRA:
+            return MMAL_ENCODING_BGRA;
+        case VLC_CODEC_ARGB:
+            return MMAL_ENCODING_ARGB;
+        // VLC_CODEC_ABGR does not exist in VLC
+        case VLC_CODEC_MMAL_OPAQUE:
+            return MMAL_ENCODING_OPAQUE;
+        case VLC_CODEC_MMAL_ZC_SAND8:
+            return MMAL_ENCODING_YUVUV128;
+        case VLC_CODEC_MMAL_ZC_SAND10:
+            return MMAL_ENCODING_YUVUV64_10;
+        case VLC_CODEC_MMAL_ZC_SAND30:
+            return MMAL_ENCODING_YUV10_COL;
+        default:
+            break;
+    }
+    return 0;
+}
+
+static void vlc_fmt_to_video_format(MMAL_VIDEO_FORMAT_T *const vf_mmal, const video_frame_format_t * const vf_vlc)
+{
+    const unsigned int wmask = (vf_vlc->i_chroma == VLC_CODEC_MMAL_ZC_I420 ||
+                                vf_vlc->i_chroma == VLC_CODEC_I420) ? 31 : 15;
+
+    vf_mmal->width          = (vf_vlc->i_width + wmask) & ~wmask;
+    vf_mmal->height         = (vf_vlc->i_height + 15) & ~15;
+    vf_mmal->crop.x         = vf_vlc->i_x_offset;
+    vf_mmal->crop.y         = vf_vlc->i_y_offset;
+    vf_mmal->crop.width     = vf_vlc->i_visible_width;
+    vf_mmal->crop.height    = vf_vlc->i_visible_height;
+    if (vf_vlc->i_sar_num == 0 || vf_vlc->i_sar_den == 0) {
+        vf_mmal->par.num        = 1;
+        vf_mmal->par.den        = 1;
+    } else {
+        vf_mmal->par.num        = vf_vlc->i_sar_num;
+        vf_mmal->par.den        = vf_vlc->i_sar_den;
+    }
+    vf_mmal->frame_rate.num = vf_vlc->i_frame_rate;
+    vf_mmal->frame_rate.den = vf_vlc->i_frame_rate_base;
+    vf_mmal->color_space    = vlc_to_mmal_color_space(vf_vlc->space);
+}
+
+
+void hw_mmal_vlc_fmt_to_mmal_fmt(MMAL_ES_FORMAT_T *const es_fmt, const video_frame_format_t * const vf_vlc)
+{
+    vlc_fmt_to_video_format(&es_fmt->es->video, vf_vlc);
+}
+
+bool hw_mmal_vlc_pic_to_mmal_fmt_update(MMAL_ES_FORMAT_T *const es_fmt, const picture_t * const pic)
+{
+    MMAL_VIDEO_FORMAT_T vf_new_ss;
+    MMAL_VIDEO_FORMAT_T *const vf_old = &es_fmt->es->video;
+    MMAL_VIDEO_FORMAT_T *const vf_new = &vf_new_ss;
+
+    vlc_fmt_to_video_format(vf_new, &pic->format);
+
+    // If we have a format that might have come from ffmpeg then rework for
+    // a better guess as to layout. All sand stuff is "special" with regards to
+    // width/height vs real layout so leave as is if that
+    if ((pic->format.i_chroma == VLC_CODEC_MMAL_ZC_I420 ||
+         pic->format.i_chroma == VLC_CODEC_MMAL_ZC_RGB32) &&
+        pic->p[0].i_pixel_pitch != 0)
+    {
+        // Now overwrite width/height with a better guess as to actual layout info
+        vf_new->height = pic->p[0].i_lines;
+        vf_new->width = pic->p[0].i_pitch / pic->p[0].i_pixel_pitch;
+    }
+
+    if (
+        vf_new->width          != vf_old->width          ||
+        vf_new->height         != vf_old->height         ||
+        vf_new->crop.x         != vf_old->crop.x         ||
+        vf_new->crop.y         != vf_old->crop.y         ||
+        vf_new->crop.width     != vf_old->crop.width     ||
+        vf_new->crop.height    != vf_old->crop.height    ||
+        vf_new->par.num        != vf_old->par.num        ||
+        vf_new->par.den        != vf_old->par.den        ||
+        // Frame rate ignored
+        vf_new->color_space    != vf_old->color_space)
+    {
+#if 0
+        char dbuf0[5], dbuf1[5];
+        printf("%dx%d (%d,%d %dx%d) par:%d/%d %s -> %dx%d (%d,%d %dx%d) par:%d/%d %s\n",
+               vf_old->width          ,
+               vf_old->height         ,
+               vf_old->crop.x         ,
+               vf_old->crop.y         ,
+               vf_old->crop.width     ,
+               vf_old->crop.height    ,
+               vf_old->par.num        ,
+               vf_old->par.den        ,
+               str_fourcc(dbuf0, vf_old->color_space)    ,
+               vf_new->width          ,
+               vf_new->height         ,
+               vf_new->crop.x         ,
+               vf_new->crop.y         ,
+               vf_new->crop.width     ,
+               vf_new->crop.height    ,
+               vf_new->par.num        ,
+               vf_new->par.den        ,
+               str_fourcc(dbuf1, vf_new->color_space)    );
+#endif
+        *vf_old = *vf_new;
+        return true;
+    }
+    return false;
+}
+
+
+hw_mmal_port_pool_ref_t * hw_mmal_port_pool_ref_create(MMAL_PORT_T * const port,
+   const unsigned int headers, const uint32_t payload_size)
+{
+    hw_mmal_port_pool_ref_t * ppr = calloc(1, sizeof(hw_mmal_port_pool_ref_t));
+    if (ppr == NULL)
+        return NULL;
+
+    if ((ppr->pool = mmal_port_pool_create(port, headers, payload_size)) == NULL)
+        goto fail;
+
+    ppr->port = port;
+    atomic_store(&ppr->refs, 1);
+    return ppr;
+
+fail:
+    free(ppr);
+    return NULL;
+}
+
+static void do_detached(void *(*fn)(void *), void * v)
+{
+    pthread_t dothread;
+    pthread_create(&dothread, NULL, fn, v);
+    pthread_detach(dothread);
+}
+
+// Destroy a ppr - aranged s.t. it has the correct prototype for a pthread
+static void * kill_ppr(void * v)
+{
+    hw_mmal_port_pool_ref_t * const ppr = v;
+    if (ppr->port->is_enabled)
+        mmal_port_disable(ppr->port);  // Avoid annoyed messages from MMAL when we kill the pool
+    mmal_port_pool_destroy(ppr->port, ppr->pool);
+    free(ppr);
+    return NULL;
+}
+
+void hw_mmal_port_pool_ref_release(hw_mmal_port_pool_ref_t * const ppr, const bool in_cb)
+{
+    if (ppr == NULL)
+        return;
+    if (atomic_fetch_sub(&ppr->refs, 1) != 1)
+        return;
+    if (in_cb)
+        do_detached(kill_ppr, ppr);
+    else
+        kill_ppr(ppr);
+}
+
+// Put buffer in port if possible - if not then release to pool
+// Returns true if sent, false if recycled
+bool hw_mmal_port_pool_ref_recycle(hw_mmal_port_pool_ref_t * const ppr, MMAL_BUFFER_HEADER_T * const buf)
+{
+    mmal_buffer_header_reset(buf);
+    buf->user_data = NULL;
+
+    if (mmal_port_send_buffer(ppr->port, buf) == MMAL_SUCCESS)
+        return true;
+    mmal_buffer_header_release(buf);
+    return false;
+}
+
+MMAL_STATUS_T hw_mmal_port_pool_ref_fill(hw_mmal_port_pool_ref_t * const ppr)
+{
+    MMAL_BUFFER_HEADER_T * buf;
+    MMAL_STATUS_T err = MMAL_SUCCESS;
+
+    while ((buf = mmal_queue_get(ppr->pool->queue)) != NULL) {
+        if ((err = mmal_port_send_buffer(ppr->port, buf)) != MMAL_SUCCESS)
+        {
+            mmal_queue_put_back(ppr->pool->queue, buf);
+            break;
+        }
+    }
+    return err;
+}
+
+
+MMAL_STATUS_T hw_mmal_opaque_output(vlc_object_t * const obj,
+                                    hw_mmal_port_pool_ref_t ** pppr,
+                                    MMAL_PORT_T * const port,
+                                    const unsigned int extra_buffers, MMAL_PORT_BH_CB_T callback)
+{
+    MMAL_STATUS_T status;
+
+    port->userdata = (struct MMAL_PORT_USERDATA_T *)obj;
+
+    status = port_parameter_set_uint32(port, MMAL_PARAMETER_EXTRA_BUFFERS, extra_buffers);
+    if (status != MMAL_SUCCESS) {
+        msg_Err(obj, "Failed to set MMAL_PARAMETER_EXTRA_BUFFERS on output port (status=%"PRIx32" %s)",
+                status, mmal_status_to_string(status));
+        return status;
+    }
+
+    status = port_parameter_set_bool(port, MMAL_PARAMETER_ZERO_COPY, 1);
+    if (status != MMAL_SUCCESS) {
+       msg_Err(obj, "Failed to set zero copy on port %s (status=%"PRIx32" %s)",
+                port->name, status, mmal_status_to_string(status));
+       return status;
+    }
+
+    port->format->encoding = MMAL_ENCODING_OPAQUE;
+    port->format->encoding_variant = 0;
+    if ((status = mmal_port_format_commit(port)) != MMAL_SUCCESS)
+    {
+        msg_Err(obj, "Failed to commit format on port %s (status=%"PRIx32" %s)",
+                 port->name, status, mmal_status_to_string(status));
+        return status;
+    }
+
+    port->buffer_num = 30;
+    port->buffer_size = port->buffer_size_recommended;
+
+    if ((*pppr = hw_mmal_port_pool_ref_create(port, port->buffer_num, port->buffer_size)) == NULL) {
+        msg_Err(obj, "Failed to create output pool");
+        return status;
+    }
+
+    status = mmal_port_enable(port, callback);
+    if (status != MMAL_SUCCESS) {
+        hw_mmal_port_pool_ref_release(*pppr, false);
+        *pppr = NULL;
+        msg_Err(obj, "Failed to enable output port %s (status=%"PRIx32" %s)",
+                port->name, status, mmal_status_to_string(status));
+        return status;
+    }
+
+    return MMAL_SUCCESS;
+}
+
+
+void hw_mmal_pic_ctx_destroy(picture_context_t * pic_ctx_cmn)
+{
+    pic_ctx_mmal_t * const ctx = (pic_ctx_mmal_t *)pic_ctx_cmn;
+    unsigned int i;
+
+    for (i = 0; i != ctx->buf_count; ++i) {
+        if (ctx->bufs[i] != NULL)
+            mmal_buffer_header_release(ctx->bufs[i]);
+    }
+
+    cma_buf_end_flight(ctx->cb);
+    cma_buf_unref(ctx->cb);
+
+    free(ctx);
+}
+
+picture_context_t * hw_mmal_pic_ctx_copy(picture_context_t * pic_ctx_cmn)
+{
+    const pic_ctx_mmal_t * const src_ctx = (pic_ctx_mmal_t *)pic_ctx_cmn;
+    pic_ctx_mmal_t * const dst_ctx = calloc(1, sizeof(*dst_ctx));
+    unsigned int i;
+
+    if (dst_ctx == NULL)
+        return NULL;
+
+    // Copy
+    dst_ctx->cmn = src_ctx->cmn;
+
+    dst_ctx->cb = cma_buf_ref(src_ctx->cb);
+
+    dst_ctx->buf_count = src_ctx->buf_count;
+    for (i = 0; i != src_ctx->buf_count; ++i) {
+        dst_ctx->bufs[i] = src_ctx->bufs[i];
+        if (dst_ctx->bufs[i] != NULL)
+            mmal_buffer_header_acquire(dst_ctx->bufs[i]);
+    }
+
+    return &dst_ctx->cmn;
+}
+
+static MMAL_BOOL_T
+buf_pre_release_cb(MMAL_BUFFER_HEADER_T * buf, void *userdata)
+{
+    hw_mmal_port_pool_ref_t * const ppr = userdata;
+
+    // Kill the callback - otherwise we will go in circles!
+    mmal_buffer_header_pre_release_cb_set(buf, (MMAL_BH_PRE_RELEASE_CB_T)0, NULL);
+    mmal_buffer_header_acquire(buf);  // Ref it again
+
+    // As we have re-acquired the buffer we need a full release
+    // (not continue) to zap the ref count back to zero
+    // This is "safe" 'cos we have already reset the cb
+    hw_mmal_port_pool_ref_recycle(ppr, buf);
+    hw_mmal_port_pool_ref_release(ppr, true); // Assume in callback
+
+    return MMAL_TRUE;
+}
+
+// Buffer belongs to context on successful return from this fn
+// is still valid on failure
+picture_context_t *
+hw_mmal_gen_context(MMAL_BUFFER_HEADER_T * buf, hw_mmal_port_pool_ref_t * const ppr)
+{
+    pic_ctx_mmal_t * const ctx = calloc(1, sizeof(pic_ctx_mmal_t));
+
+    if (ctx == NULL)
+        return NULL;
+
+    // If we have an associated ppr then ref & set appropriate callbacks
+    if (ppr != NULL) {
+        hw_mmal_port_pool_ref_acquire(ppr);
+        mmal_buffer_header_pre_release_cb_set(buf, buf_pre_release_cb, ppr);
+        buf->user_data = NULL;
+    }
+
+    ctx->cmn.copy = hw_mmal_pic_ctx_copy;
+    ctx->cmn.destroy = hw_mmal_pic_ctx_destroy;
+
+    ctx->buf_count = 1;
+    ctx->bufs[0] = buf;
+
+    return &ctx->cmn;
+}
+
+// n is els
+// * Make NEON!
+typedef void piccpy_fn(void * dest, const void * src, size_t n);
+
+extern piccpy_fn mmal_piccpy_10_to_8_neon;
+
+static void piccpy_10_to_8_c(void * dest, const void * src, size_t n)
 {
-    picture_sys_t *pic_sys = picture->p_sys;
-    MMAL_BUFFER_HEADER_T *buffer = pic_sys->buffer;
+    uint8_t * d = dest;
+    const uint16_t * s = src;
+    while (n-- != 0)
+        *d++ = *s++ >> 2;
+}
+
+// Do a stride converting copy - if the strides are the same and line_len is
+// close then do a single block copy - we don't expect to have to preserve
+// pixels in the output frame
+static void mem_copy_2d(uint8_t * d_ptr, const size_t d_stride,
+                        const uint8_t * s_ptr, const size_t s_stride,
+                        size_t lines, const size_t line_len)
+{
+    if (s_stride == d_stride && d_stride < line_len + 32)
+    {
+        memcpy(d_ptr, s_ptr, d_stride * lines);
+    }
+    else
+    {
+        while (lines-- != 0) {
+            memcpy(d_ptr, s_ptr, line_len);
+            d_ptr += d_stride;
+            s_ptr += s_stride;
+        }
+    }
+}
+
+// line_len in D units
+static void mem_copy_2d_10_to_8(uint8_t * d_ptr, const size_t d_stride,
+                        const uint8_t * s_ptr, const size_t s_stride,
+                        size_t lines, const size_t line_len)
+{
+    piccpy_fn * const docpy = vlc_CPU_ARM_NEON() ? mmal_piccpy_10_to_8_neon : piccpy_10_to_8_c;
+    if (s_stride == d_stride * 2 && d_stride < line_len + 32)
+    {
+        docpy(d_ptr, s_ptr, d_stride * lines);
+    }
+    else
+    {
+        while (lines-- != 0) {
+            docpy(d_ptr, s_ptr, line_len);
+            d_ptr += d_stride;
+            s_ptr += s_stride;
+        }
+    }
+}
+
+
+int hw_mmal_copy_pic_to_buf(void * const buf_data,
+                            uint32_t * const pLength,
+                            const MMAL_ES_FORMAT_T * const fmt,
+                            const picture_t * const pic)
+{
+    const MMAL_VIDEO_FORMAT_T *const video = &fmt->es->video;
+    uint8_t * const dest = buf_data;
+    size_t length = 0;
+
+    //**** Worry about x/y_offsets
 
-    int offset = 0;
-    picture->p[0].p_pixels = buffer->data;
-    for (int i = 1; i < picture->i_planes; i++) {
-        offset = offset + picture->p[i - 1].i_pitch * picture->p[i - 1].i_lines;
-        picture->p[i].p_pixels = (ptrdiff_t)buffer->data + offset;
+    assert(fmt->encoding == MMAL_ENCODING_I420);
+
+    switch (pic->format.i_chroma) {
+        case VLC_CODEC_I420:
+        {
+            const size_t y_size = video->width * video->height;
+            mem_copy_2d(dest, video->width,
+                 pic->p[0].p_pixels, pic->p[0].i_pitch,
+                 video->crop.height,
+                 video->crop.width);
+
+            mem_copy_2d(dest + y_size, video->width / 2,
+                 pic->p[1].p_pixels, pic->p[1].i_pitch,
+                 video->crop.height / 2,
+                 video->crop.width / 2);
+
+            mem_copy_2d(dest + y_size + y_size / 4, video->width / 2,
+                 pic->p[2].p_pixels, pic->p[2].i_pitch,
+                 video->crop.height / 2,
+                 video->crop.width / 2);
+
+            // And make sure it is actually in memory
+            length = y_size + y_size / 2;
+            break;
+        }
+
+        case VLC_CODEC_I420_10L:
+        {
+            const size_t y_size = video->width * video->height;
+            mem_copy_2d_10_to_8(dest, video->width,
+                 pic->p[0].p_pixels, pic->p[0].i_pitch,
+                 video->crop.height,
+                 video->crop.width);
+
+            mem_copy_2d_10_to_8(dest + y_size, video->width / 2,
+                 pic->p[1].p_pixels, pic->p[1].i_pitch,
+                 video->crop.height / 2,
+                 video->crop.width / 2);
+
+            mem_copy_2d_10_to_8(dest + y_size + y_size / 4, video->width / 2,
+                 pic->p[2].p_pixels, pic->p[2].i_pitch,
+                 video->crop.height / 2,
+                 video->crop.width / 2);
+
+            // And make sure it is actually in memory
+            length = y_size + y_size / 2;
+            break;
+        }
+
+        default:
+            if (pLength != NULL)
+                *pLength = 0;
+            return VLC_EBADVAR;
+    }
+
+    if (cma_vcsm_type() == VCSM_INIT_LEGACY) {  // ** CMA is currently always uncached
+        flush_range(dest, length);
     }
 
-    pic_sys->displayed = false;
+    if (pLength != NULL)
+        *pLength = (uint32_t)length;
 
     return VLC_SUCCESS;
 }
+
+
+static MMAL_BOOL_T rep_buf_free_cb(MMAL_BUFFER_HEADER_T *header, void *userdata)
+{
+    cma_buf_t * const cb = userdata;
+    VLC_UNUSED(header);
+
+    cma_buf_unref(cb);
+    return MMAL_FALSE;
+}
+
+static int cma_buf_buf_attach(MMAL_BUFFER_HEADER_T * const buf, cma_buf_t * const cb)
+{
+    // Just a CMA buffer - fill in new buffer
+    const uintptr_t vc_h = cma_buf_vc_handle(cb);
+    if (vc_h == 0)
+        return VLC_EGENERIC;
+
+    mmal_buffer_header_reset(buf);
+    buf->data       = (uint8_t *)vc_h;
+    buf->alloc_size = cma_buf_size(cb);
+    buf->length     = buf->alloc_size;
+    // Ensure cb remains valid for the duration of this buffer
+    mmal_buffer_header_pre_release_cb_set(buf, rep_buf_free_cb, cma_buf_ref(cb));
+    return VLC_SUCCESS;
+}
+
+MMAL_BUFFER_HEADER_T * hw_mmal_pic_buf_copied(const picture_t *const pic,
+                                              MMAL_POOL_T * const rep_pool,
+                                              MMAL_PORT_T * const port,
+                                              cma_buf_pool_t * const cbp)
+{
+    MMAL_BUFFER_HEADER_T *const buf = mmal_queue_wait(rep_pool->queue);
+    if (buf == NULL)
+        goto fail0;
+
+    cma_buf_t * const cb = cma_buf_pool_alloc_buf(cbp, port->buffer_size);
+    if (cb == NULL)
+        goto fail1;
+
+    if (cma_buf_buf_attach(buf, cb) != VLC_SUCCESS)
+        goto fail2;
+
+    pic_to_buf_copy_props(buf, pic);
+
+    if (hw_mmal_copy_pic_to_buf(cma_buf_addr(cb), &buf->length, port->format, pic) != VLC_SUCCESS)
+        goto fail2;
+    buf->flags = MMAL_BUFFER_HEADER_FLAG_FRAME_END;
+
+    cma_buf_unref(cb);
+    return buf;
+
+fail2:
+    cma_buf_unref(cb);
+fail1:
+    mmal_buffer_header_release(buf);
+fail0:
+    return NULL;
+}
+
+MMAL_BUFFER_HEADER_T * hw_mmal_pic_buf_replicated(const picture_t *const pic, MMAL_POOL_T * const rep_pool)
+{
+    pic_ctx_mmal_t *const ctx = (pic_ctx_mmal_t *)pic->context;
+    MMAL_BUFFER_HEADER_T *const rep_buf = mmal_queue_wait(rep_pool->queue);
+
+    if (rep_buf == NULL)
+        return NULL;
+
+    if (ctx->bufs[0] != NULL)
+    {
+        // Existing buffer - replicate it
+        if (mmal_buffer_header_replicate(rep_buf, ctx->bufs[0]) != MMAL_SUCCESS)
+            goto fail;
+    }
+    else if (ctx->cb != NULL)
+    {
+        // Just a CMA buffer - fill in new buffer
+        if (cma_buf_buf_attach(rep_buf, ctx->cb) != 0)
+            goto fail;
+    }
+    else
+        goto fail;
+
+    pic_to_buf_copy_props(rep_buf, pic);
+    return rep_buf;
+
+fail:
+    mmal_buffer_header_release(rep_buf);
+    return NULL;
+}
+
+
+
+
+int hw_mmal_get_gpu_mem(void) {
+    static int stashed_val = -2;
+    VCHI_INSTANCE_T vchi_instance;
+    VCHI_CONNECTION_T *vchi_connection = NULL;
+    char rbuf[1024] = { 0 };
+
+    if (stashed_val >= -1)
+        return stashed_val;
+
+    if (vchi_initialise(&vchi_instance) != 0)
+        goto fail0;
+
+    //create a vchi connection
+    if (vchi_connect(NULL, 0, vchi_instance) != 0)
+        goto fail0;
+
+    vc_vchi_gencmd_init(vchi_instance, &vchi_connection, 1);
+
+    //send the gencmd for the argument
+    if (vc_gencmd_send("get_mem gpu") != 0)
+        goto fail;
+
+    if (vc_gencmd_read_response(rbuf, sizeof(rbuf) - 1) != 0)
+        goto fail;
+
+    if (strncmp(rbuf, "gpu=", 4) != 0)
+        goto fail;
+
+    char *p;
+    unsigned long m = strtoul(rbuf + 4, &p, 10);
+
+    if (p[0] != 'M' || p[1] != '\0')
+        stashed_val = -1;
+    else
+        stashed_val = (int)m << 20;
+
+    vc_gencmd_stop();
+
+    //close the vchi connection
+    vchi_disconnect(vchi_instance);
+
+    return stashed_val;
+
+fail:
+    vc_gencmd_stop();
+    vchi_disconnect(vchi_instance);
+fail0:
+    stashed_val = -1;
+    return -1;
+};
+
+// ===========================================================================
+
+typedef struct pool_ent_s
+{
+    struct pool_ent_s * next;
+    struct pool_ent_s * prev;
+
+    atomic_int ref_count;
+    unsigned int seq;
+
+    size_t size;
+
+    int vcsm_hdl;
+    int vc_hdl;
+    void * buf;
+
+    unsigned int width;
+    unsigned int height;
+    MMAL_FOURCC_T enc_type;
+
+    picture_t * pic;
+} pool_ent_t;
+
+
+typedef struct ent_list_hdr_s
+{
+    pool_ent_t * ents;
+    pool_ent_t * tail;
+    unsigned int n;
+} ent_list_hdr_t;
+
+#define ENT_LIST_HDR_INIT (ent_list_hdr_t){ \
+   .ents = NULL, \
+   .tail = NULL, \
+   .n = 0 \
+}
+
+struct vzc_pool_ctl_s
+{
+    atomic_int ref_count;
+
+    ent_list_hdr_t ent_pool;
+    ent_list_hdr_t ents_cur;
+    ent_list_hdr_t ents_prev;
+
+    unsigned int max_n;
+    unsigned int seq;
+
+    vlc_mutex_t lock;
+
+    MMAL_POOL_T * buf_pool;
+
+    vcsm_init_type_t vcsm_init_type;
+};
+
+typedef struct vzc_subbuf_ent_s
+{
+    pool_ent_t * ent;
+    MMAL_RECT_T pic_rect;
+    MMAL_RECT_T orig_dest_rect;
+    MMAL_DISPLAYREGION_T dreg;
+} vzc_subbuf_ent_t;
+
+
+static pool_ent_t * ent_extract(ent_list_hdr_t * const elh, pool_ent_t * const ent)
+{
+//    printf("List %p [%d]: Ext %p\n", elh, elh->n, ent);
+
+    if (ent == NULL)
+        return NULL;
+
+    if (ent->next == NULL)
+        elh->tail = ent->prev;
+    else
+        ent->next->prev = ent->prev;
+
+    if (ent->prev == NULL)
+        elh->ents = ent->next;
+    else
+        ent->prev->next = ent->next;
+
+    ent->prev = ent->next = NULL;
+
+    --elh->n;
+
+    return ent;  // For convienience
+}
+
+static inline pool_ent_t * ent_extract_tail(ent_list_hdr_t * const elh)
+{
+    return ent_extract(elh, elh->tail);
+}
+
+static void ent_add_head(ent_list_hdr_t * const elh, pool_ent_t * const ent)
+{
+//    printf("List %p [%d]: Add %p\n", elh, elh->n, ent);
+
+    if ((ent->next = elh->ents) == NULL)
+        elh->tail = ent;
+    else
+        ent->next->prev = ent;
+
+    ent->prev = NULL;
+    elh->ents = ent;
+    ++elh->n;
+}
+
+static void ent_free(pool_ent_t * const ent)
+{
+//    printf("Free ent: %p\n", ent);
+    if (ent != NULL) {
+        // If we still have a ref to a pic - kill it now
+        if (ent->pic != NULL)
+            picture_Release(ent->pic);
+
+        // Free contents
+        vcsm_unlock_hdl(ent->vcsm_hdl);
+
+        vcsm_free(ent->vcsm_hdl);
+
+        free(ent);
+    }
+}
+
+static void ent_free_list(ent_list_hdr_t * const elh)
+{
+    pool_ent_t * ent = elh->ents;
+
+//    printf("Free list: %p [%d]\n", elh, elh->n);
+
+    *elh = ENT_LIST_HDR_INIT;
+
+    while (ent != NULL) {
+        pool_ent_t * const t = ent;
+        ent = t->next;
+        ent_free(t);
+    }
+}
+
+static void ent_list_move(ent_list_hdr_t * const dst, ent_list_hdr_t * const src)
+{
+//    printf("Move %p->%p\n", src, dst);
+
+    *dst = *src;
+    *src = ENT_LIST_HDR_INIT;
+}
+
+// Scans "backwards" as that should give us the fastest match if we are
+// presented with pics in the same order each time
+static pool_ent_t * ent_list_extract_pic_ent(ent_list_hdr_t * const elh, picture_t * const pic)
+{
+    pool_ent_t *ent = elh->tail;
+
+//    printf("Find list: %p [%d]; pic:%p\n", elh, elh->n, pic);
+
+    while (ent != NULL) {
+//        printf("Check ent: %p, pic:%p\n", ent, ent->pic);
+
+        if (ent->pic == pic)
+            return ent_extract(elh, ent);
+        ent = ent->prev;
+    }
+    return NULL;
+}
+
+#define POOL_ENT_ALLOC_BLOCK  0x10000
+
+static pool_ent_t * pool_ent_alloc_new(size_t req_size)
+{
+    pool_ent_t * ent = calloc(1, sizeof(*ent));
+    const size_t alloc_size = (req_size + POOL_ENT_ALLOC_BLOCK - 1) & ~(POOL_ENT_ALLOC_BLOCK - 1);
+
+    if (ent == NULL)
+        return NULL;
+
+    ent->next = ent->prev = NULL;
+
+    // Alloc from vcsm
+    if ((ent->vcsm_hdl = vcsm_malloc_cache(alloc_size, VCSM_CACHE_TYPE_HOST, (char *)"vlc-subpic")) == -1)
+        goto fail1;
+    if ((ent->vc_hdl = vcsm_vc_hdl_from_hdl(ent->vcsm_hdl)) == 0)
+        goto fail2;
+    if ((ent->buf = vcsm_lock(ent->vcsm_hdl)) == NULL)
+        goto fail2;
+
+    ent->size = alloc_size;
+    return ent;
+
+fail2:
+    vcsm_free(ent->vcsm_hdl);
+fail1:
+    free(ent);
+    return NULL;
+}
+
+static inline pool_ent_t * pool_ent_ref(pool_ent_t * const ent)
+{
+//    int n = atomic_fetch_add(&ent->ref_count, 1) + 1;
+//    printf("Ref: %p: %d\n", ent, n);
+    atomic_fetch_add(&ent->ref_count, 1);
+    return ent;
+}
+
+static void pool_recycle(vzc_pool_ctl_t * const pc, pool_ent_t * const ent)
+{
+    pool_ent_t * xs = NULL;
+    int n;
+
+    if (ent == NULL)
+        return;
+
+    n = atomic_fetch_sub(&ent->ref_count, 1) - 1;
+
+//    printf("%s: Pool: %p: Ent: %p: %d\n", __func__, &pc->ent_pool, ent, n);
+
+    if (n != 0)
+        return;
+
+    if (ent->pic != NULL) {
+        picture_Release(ent->pic);
+        ent->pic = NULL;
+    }
+
+    vlc_mutex_lock(&pc->lock);
+
+    // If we have a full pool then extract the LRU and free it
+    // Free done outside mutex
+    if (pc->ent_pool.n >= pc->max_n)
+        xs = ent_extract_tail(&pc->ent_pool);
+
+    ent_add_head(&pc->ent_pool, ent);
+
+    vlc_mutex_unlock(&pc->lock);
+
+    ent_free(xs);
+}
+
+// * This could be made more efficient, but this is easy
+static void pool_recycle_list(vzc_pool_ctl_t * const pc, ent_list_hdr_t * const elh)
+{
+    pool_ent_t * ent;
+    while ((ent = ent_extract_tail(elh)) != NULL) {
+        pool_recycle(pc, ent);
+    }
+}
+
+static pool_ent_t * pool_best_fit(vzc_pool_ctl_t * const pc, size_t req_size)
+{
+    pool_ent_t * best = NULL;
+
+    vlc_mutex_lock(&pc->lock);
+
+    {
+        pool_ent_t * ent = pc->ent_pool.ents;
+
+        // Simple scan
+        while (ent != NULL) {
+            if (ent->size >= req_size && ent->size <= req_size * 2 + POOL_ENT_ALLOC_BLOCK &&
+                    (best == NULL || best->size > ent->size))
+                best = ent;
+            ent = ent->next;
+        }
+
+        // extract best from chain if we've found it
+        ent_extract(&pc->ent_pool, best);
+    }
+
+    vlc_mutex_unlock(&pc->lock);
+
+    if (best == NULL)
+        best = pool_ent_alloc_new(req_size);
+
+    if ((best->seq = ++pc->seq) == 0)
+        best->seq = ++pc->seq;  // Never allow to be zero
+
+    atomic_store(&best->ref_count, 1);
+    return best;
+}
+
+
+const vlc_fourcc_t hw_mmal_vzc_subpicture_chromas[] = { VLC_CODEC_RGBA, VLC_CODEC_BGRA, VLC_CODEC_ARGB, 0 };
+
+void hw_mmal_vzc_buf_get_wh(MMAL_BUFFER_HEADER_T * const buf, int * const pW, int * const pH)
+{
+    const pool_ent_t *const ent = ((vzc_subbuf_ent_t *)buf->user_data)->ent;
+    *pW = ent->width;
+    *pH = ent->height;
+}
+
+bool hw_mmal_vzc_buf_set_format(MMAL_BUFFER_HEADER_T * const buf, MMAL_ES_FORMAT_T * const es_fmt)
+{
+    const pool_ent_t *const ent = ((vzc_subbuf_ent_t *)buf->user_data)->ent;
+    MMAL_VIDEO_FORMAT_T * const v_fmt = &es_fmt->es->video;
+
+    es_fmt->type = MMAL_ES_TYPE_VIDEO;
+    es_fmt->encoding = ent->enc_type;
+    es_fmt->encoding_variant = 0;
+
+    v_fmt->width = ent->width;
+    v_fmt->height = ent->height;
+    v_fmt->crop.x = 0;
+    v_fmt->crop.y = 0;
+    v_fmt->crop.width = ent->width;
+    v_fmt->crop.height = ent->height;
+
+    return true;
+}
+
+void hw_mmal_vzc_buf_frame_size(MMAL_BUFFER_HEADER_T * const buf,
+                                uint32_t * const pWidth, uint32_t * const pHeight)
+{
+    const pool_ent_t *const ent = ((vzc_subbuf_ent_t *)buf->user_data)->ent;
+    *pWidth = ent->width;
+    *pHeight = ent->height;
+}
+
+
+MMAL_DISPLAYREGION_T * hw_mmal_vzc_buf_region(MMAL_BUFFER_HEADER_T * const buf)
+{
+    vzc_subbuf_ent_t * sb = buf->user_data;
+    return &sb->dreg;
+}
+
+static inline int rescale_x(int x, int mul, int div)
+{
+    return div == 0 ? x * mul : (x * mul + div/2) / div;
+}
+
+static void rescale_rect(MMAL_RECT_T * const d, const MMAL_RECT_T * const s, const MMAL_RECT_T * mul_rect, const MMAL_RECT_T * div_rect)
+{
+    d->x      = rescale_x(s->x - div_rect->x, mul_rect->width,  div_rect->width)  + mul_rect->x;
+    d->y      = rescale_x(s->y - div_rect->y, mul_rect->height, div_rect->height) + mul_rect->y;
+    d->width  = rescale_x(s->width,           mul_rect->width,  div_rect->width);
+    d->height = rescale_x(s->height,          mul_rect->height, div_rect->height);
+#if 0
+    fprintf(stderr, "(%d,%d %dx%d) * (%d,%d %dx%d) / (%d,%d %dx%d) -> (%d,%d %dx%d)\n",
+            s->x, s->y, s->width, s->height,
+            mul_rect->x, mul_rect->y, mul_rect->width, mul_rect->height,
+            div_rect->x, div_rect->y, div_rect->width, div_rect->height,
+            d->x, d->y, d->width, d->height);
+#endif
+}
+
+void hw_mmal_vzc_buf_scale_dest_rect(MMAL_BUFFER_HEADER_T * const buf, const MMAL_RECT_T * const scale_rect)
+{
+    vzc_subbuf_ent_t * sb = buf->user_data;
+    if (scale_rect == NULL) {
+        sb->dreg.dest_rect = sb->orig_dest_rect;
+    }
+    else
+    {
+        rescale_rect(&sb->dreg.dest_rect, &sb->orig_dest_rect,
+                     scale_rect, &sb->pic_rect);
+    }
+}
+
+unsigned int hw_mmal_vzc_buf_seq(MMAL_BUFFER_HEADER_T * const buf)
+{
+    vzc_subbuf_ent_t * sb = buf->user_data;
+    return sb->ent->seq;
+}
+
+
+// The intent with the ents_cur & ents_last stuff is to remember the buffers
+// we used on the last frame and reuse them on the current one if they are the
+// same.  Unfortunately detection of "is_first" is only a heuristic (there are
+// no rules governing the order in which things are blended) so we must deal
+// (fairly) gracefully with it never (or always) being set.
+
+// dst_fmt gives the number space in which the destination pixels are specified
+
+MMAL_BUFFER_HEADER_T * hw_mmal_vzc_buf_from_pic(vzc_pool_ctl_t * const pc,
+                                                picture_t * const pic,
+                                                const MMAL_RECT_T dst_pic_rect,
+                                                const int x_offset, const int y_offset,
+                                                const unsigned int alpha,
+                                                const bool is_first)
+{
+    MMAL_BUFFER_HEADER_T * const buf = mmal_queue_get(pc->buf_pool->queue);
+    vzc_subbuf_ent_t * sb;
+
+    if (buf == NULL)
+        return NULL;
+
+    if ((sb = calloc(1, sizeof(*sb))) == NULL)
+        goto fail1;
+
+    // If first or we've had a lot of stuff move everything to the last list
+    // (we could deal more gracefully with the "too many" case but it shouldn't
+    // really happen)
+    if (is_first || pc->ents_cur.n >= CTX_BUFS_MAX) {
+        pool_recycle_list(pc, &pc->ents_prev);
+        ent_list_move(&pc->ents_prev, &pc->ents_cur);
+    }
+
+    sb->dreg.hdr.id = MMAL_PARAMETER_DISPLAYREGION;
+    sb->dreg.hdr.size = sizeof(sb->dreg);
+    buf->user_data = sb;
+
+    {
+        // ?? Round start offset as well as length
+        const video_format_t *const fmt = &pic->format;
+
+        const unsigned int bpp = (fmt->i_bits_per_pixel + 7) >> 3;
+        const unsigned int xl = (fmt->i_x_offset & ~15);
+        const unsigned int xr = (fmt->i_x_offset + fmt->i_visible_width + 15) & ~15;
+        const size_t dst_stride = (xr - xl) * bpp;
+        const size_t dst_lines = ((fmt->i_visible_height + 15) & ~15);
+        const size_t dst_size = dst_stride * dst_lines;
+
+        pool_ent_t * ent = ent_list_extract_pic_ent(&pc->ents_prev, pic);
+        bool needs_copy = false;
+
+        // If we didn't find ent in last then look in cur in case is_first
+        // isn't working
+        if (ent == NULL)
+            ent = ent_list_extract_pic_ent(&pc->ents_cur, pic);
+
+//        printf("ent_found: %p\n", ent);
+
+        if (ent == NULL)
+        {
+            // Need a new ent
+            needs_copy = true;
+
+            if ((ent = pool_best_fit(pc, dst_size)) == NULL)
+                goto fail2;
+            if ((ent->enc_type = vlc_to_mmal_video_fourcc(&pic->format)) == 0)
+                goto fail2;
+
+            ent->pic = picture_Hold(pic);
+        }
+
+        ent_add_head(&pc->ents_cur, ent);
+
+        sb->ent = pool_ent_ref(ent);
+        hw_mmal_vzc_pool_ref(pc);
+
+        // Copy data
+        buf->next = NULL;
+        buf->cmd = 0;
+        buf->data = (uint8_t *)(ent->vc_hdl);
+        buf->alloc_size = buf->length = dst_size;
+        buf->offset = 0;
+        buf->flags = MMAL_BUFFER_HEADER_FLAG_FRAME_END;
+        buf->pts = buf->dts = pic->date != VLC_TICK_INVALID ? pic->date : MMAL_TIME_UNKNOWN;
+        buf->type->video = (MMAL_BUFFER_HEADER_VIDEO_SPECIFIC_T){
+            .planes = 1,
+            .pitch = { dst_stride }
+        };
+
+        // Remember offsets
+        sb->dreg.set = MMAL_DISPLAY_SET_SRC_RECT |
+            MMAL_DISPLAY_SET_DEST_RECT |
+            MMAL_DISPLAY_SET_FULLSCREEN |
+            MMAL_DISPLAY_SET_ALPHA;
+
+        sb->dreg.fullscreen = 0;
+        // Will be set later - zero now to avoid any confusion
+        sb->dreg.dest_rect = (MMAL_RECT_T){0, 0, 0, 0};
+
+        sb->dreg.alpha = (uint32_t)(alpha & 0xff) | MMAL_DISPLAY_ALPHA_FLAGS_MIX;
+
+//        printf("+++ bpp:%d, vis:%dx%d wxh:%dx%d, d:%dx%d\n", bpp, fmt->i_visible_width, fmt->i_visible_height, fmt->i_width, fmt->i_height, dst_stride, dst_lines);
+
+        sb->dreg.src_rect = (MMAL_RECT_T){
+            .x      = (fmt->i_x_offset - xl),
+            .y      = 0,
+            .width  = fmt->i_visible_width,
+            .height = fmt->i_visible_height
+        };
+
+        sb->pic_rect = dst_pic_rect;
+
+        sb->orig_dest_rect = (MMAL_RECT_T){
+            .x      = x_offset,
+            .y      = y_offset,
+            .width  = fmt->i_visible_width,
+            .height = fmt->i_visible_height
+        };
+
+        if (needs_copy)
+        {
+            ent->width = dst_stride / bpp;
+            ent->height = dst_lines;
+
+            // 2D copy
+            {
+                uint8_t *d = ent->buf;
+                const uint8_t *s = pic->p[0].p_pixels + xl * bpp + fmt->i_y_offset * pic->p[0].i_pitch;
+
+                mem_copy_2d(d, dst_stride, s, pic->p[0].i_pitch, fmt->i_visible_height, dst_stride);
+
+                // And make sure it is actually in memory
+                if (pc->vcsm_init_type != VCSM_INIT_CMA) {  // ** CMA is currently always uncached
+                    flush_range(ent->buf, dst_stride * fmt->i_visible_height);
+                }
+            }
+        }
+    }
+
+    return buf;
+
+fail2:
+    free(sb);
+fail1:
+    mmal_buffer_header_release(buf);
+    return NULL;
+}
+
+void hw_mmal_vzc_pool_flush(vzc_pool_ctl_t * const pc)
+{
+    pool_recycle_list(pc, &pc->ents_prev);
+    pool_recycle_list(pc, &pc->ents_cur);
+}
+
+static void hw_mmal_vzc_pool_delete(vzc_pool_ctl_t * const pc)
+{
+
+//    printf("<<< %s\n", __func__);
+
+    hw_mmal_vzc_pool_flush(pc);
+
+    ent_free_list(&pc->ent_pool);
+
+    if (pc->buf_pool != NULL)
+        mmal_pool_destroy(pc->buf_pool);
+
+    vlc_mutex_destroy(&pc->lock);
+
+    cma_vcsm_exit(pc->vcsm_init_type);
+
+//    memset(pc, 0xba, sizeof(*pc)); // Zap for (hopefully) faster crash
+    free (pc);
+
+    //    printf(">>> %s\n", __func__);
+}
+
+void hw_mmal_vzc_pool_release(vzc_pool_ctl_t * const pc)
+{
+    int n;
+
+    if (pc == NULL)
+        return;
+
+    n = atomic_fetch_sub(&pc->ref_count, 1) - 1;
+
+    if (n != 0)
+        return;
+
+    hw_mmal_vzc_pool_delete(pc);
+}
+
+void hw_mmal_vzc_pool_ref(vzc_pool_ctl_t * const pc)
+{
+    atomic_fetch_add(&pc->ref_count, 1);
+}
+
+static MMAL_BOOL_T vcz_pool_release_cb(MMAL_POOL_T * buf_pool, MMAL_BUFFER_HEADER_T *buf, void *userdata)
+{
+    vzc_pool_ctl_t * const pc = userdata;
+    vzc_subbuf_ent_t * const sb = buf->user_data;
+
+    VLC_UNUSED(buf_pool);
+
+//    printf("<<< %s\n", __func__);
+
+    if (sb != NULL) {
+        buf->user_data = NULL;
+        pool_recycle(pc, sb->ent);
+        hw_mmal_vzc_pool_release(pc);
+        free(sb);
+    }
+
+//    printf(">>> %s\n", __func__);
+
+    return MMAL_TRUE;
+}
+
+vzc_pool_ctl_t * hw_mmal_vzc_pool_new()
+{
+    vzc_pool_ctl_t * const pc = calloc(1, sizeof(*pc));
+
+    if (pc == NULL)
+        return NULL;
+
+    if ((pc->vcsm_init_type = cma_vcsm_init()) == VCSM_INIT_NONE)
+    {
+        free(pc);
+        return NULL;
+    }
+
+    pc->max_n = 8;
+    vlc_mutex_init(&pc->lock);  // Must init before potential destruction
+
+    if ((pc->buf_pool = mmal_pool_create(64, 0)) == NULL)
+    {
+        hw_mmal_vzc_pool_delete(pc);
+        return NULL;
+    }
+
+    atomic_store(&pc->ref_count, 1);
+
+    mmal_pool_callback_set(pc->buf_pool, vcz_pool_release_cb, pc);
+
+    return pc;
+}
+
+//----------------------------------------------------------------------------
+
+
+static const uint8_t shift_00[] = {0,0,0,0};
+static const uint8_t shift_01[] = {0,1,1,1};
+
+int cma_pic_set_data(picture_t * const pic,
+                            const MMAL_ES_FORMAT_T * const mm_esfmt,
+                            const MMAL_BUFFER_HEADER_T * const buf)
+{
+    const MMAL_VIDEO_FORMAT_T * const mm_fmt = &mm_esfmt->es->video;
+    const MMAL_BUFFER_HEADER_VIDEO_SPECIFIC_T *const buf_vid = (buf == NULL) ? NULL : &buf->type->video;
+    cma_buf_t *const cb = cma_buf_pic_get(pic);
+    unsigned int planes = 1;
+
+    uint8_t * const data = cma_buf_addr(cb);
+    if (data == NULL) {
+        return VLC_ENOMEM;
+    }
+
+    const uint8_t * ws = shift_00;
+    const uint8_t * hs = shift_00;
+    int pb = 1;
+
+    switch (mm_esfmt->encoding)
+    {
+        case MMAL_ENCODING_ARGB:
+        case MMAL_ENCODING_ABGR:
+        case MMAL_ENCODING_RGBA:
+        case MMAL_ENCODING_BGRA:
+        case MMAL_ENCODING_RGB32:
+        case MMAL_ENCODING_BGR32:
+            pb = 4;
+            break;
+        case MMAL_ENCODING_RGB16:
+            pb = 2;
+            break;
+
+        case MMAL_ENCODING_I420:
+            ws = shift_01;
+            hs = shift_01;
+            planes = 3;
+            break;
+
+        case MMAL_ENCODING_YUVUV128:
+            hs = shift_01;
+            planes = 2;
+            break;
+
+        default:
+//            msg_Err(p_filter, "%s: Unexpected format", __func__);
+            return VLC_EGENERIC;
+    }
+
+    // Fix up SAR if unset
+    if (pic->format.i_sar_den == 0 || pic->format.i_sar_num == 0) {
+        pic->format.i_sar_den = mm_fmt->par.den;
+        pic->format.i_sar_num = mm_fmt->par.num;
+    }
+
+    pic->i_planes = planes;
+    unsigned int offset = 0;
+    for (unsigned int i = 0; i != planes; ++i) {
+        pic->p[i] = (plane_t){
+            .p_pixels = data + (buf_vid != NULL ? buf_vid->offset[i] : offset),
+            .i_lines = mm_fmt->height >> hs[i],
+            .i_pitch = buf_vid != NULL ? buf_vid->pitch[i] : mm_fmt->width * pb,
+            .i_pixel_pitch = pb,
+            .i_visible_lines = mm_fmt->crop.height >> hs[i],
+            .i_visible_pitch = mm_fmt->crop.width >> ws[i]
+        };
+        offset += pic->p[i].i_pitch * pic->p[i].i_lines;
+    }
+    return VLC_SUCCESS;
+}
+
+int cma_buf_pic_attach(cma_buf_t * const cb, picture_t * const pic)
+{
+    if (!is_cma_buf_pic_chroma(pic->format.i_chroma))
+        return VLC_EGENERIC;
+    if (pic->context != NULL)
+        return VLC_EBADVAR;
+
+    pic_ctx_mmal_t * const ctx = calloc(1, sizeof(pic_ctx_mmal_t));
+
+    if (ctx == NULL)
+        return VLC_ENOMEM;
+
+    ctx->cmn.copy = hw_mmal_pic_ctx_copy;
+    ctx->cmn.destroy = hw_mmal_pic_ctx_destroy;
+    ctx->buf_count = 1; // cb takes the place of the 1st buf
+    ctx->cb = cb;
+
+    cma_buf_in_flight(cb);
+
+    pic->context = &ctx->cmn;
+    return VLC_SUCCESS;
+}
+
+cma_buf_t * cma_buf_pic_get(picture_t * const pic)
+{
+    pic_ctx_mmal_t * const ctx = (pic_ctx_mmal_t *)pic->context;
+    return !is_cma_buf_pic_chroma(pic->format.i_chroma) || ctx  == NULL ? 0 : ctx->cb;
+}
+
+
+//----------------------------------------------------------------------------
+
+/* Returns the type of the Pi being used
+*/
+bool rpi_is_model_pi4(void) {
+    return bcm_host_is_model_pi4();
+}
+
+// Preferred mode - none->cma on Pi4 otherwise legacy
+static volatile vcsm_init_type_t last_vcsm_type = VCSM_INIT_NONE;
+
+vcsm_init_type_t cma_vcsm_type(void)
+{
+    return last_vcsm_type;
+}
+
+vcsm_init_type_t cma_vcsm_init(void)
+{
+    vcsm_init_type_t rv = VCSM_INIT_NONE;
+    // We don't bother locking - taking a copy here should be good enough
+    vcsm_init_type_t try_type = last_vcsm_type;
+
+    if (try_type == VCSM_INIT_NONE) {
+        if (bcm_host_is_fkms_active())
+            try_type = VCSM_INIT_CMA;
+        else
+            try_type = VCSM_INIT_LEGACY;
+    }
+
+    if (try_type == VCSM_INIT_CMA) {
+        if (vcsm_init_ex(1, -1) == 0)
+            rv = VCSM_INIT_CMA;
+        else if (vcsm_init_ex(0, -1) == 0)
+            rv = VCSM_INIT_LEGACY;
+    }
+    else
+    {
+        if (vcsm_init_ex(0, -1) == 0)
+            rv = VCSM_INIT_LEGACY;
+        else if (vcsm_init_ex(1, -1) == 0)
+            rv = VCSM_INIT_CMA;
+    }
+
+    // Just in case this affects vcsm init do after that
+    if (rv != VCSM_INIT_NONE)
+        bcm_host_init();
+
+    last_vcsm_type = rv;
+    return rv;
+}
+
+void cma_vcsm_exit(const vcsm_init_type_t init_mode)
+{
+    if (init_mode != VCSM_INIT_NONE)
+    {
+        vcsm_exit();
+        bcm_host_deinit();  // Does nothing but add in case it ever does
+    }
+}
+
+const char * cma_vcsm_init_str(const vcsm_init_type_t init_mode)
+{
+    switch (init_mode)
+    {
+        case VCSM_INIT_CMA:
+            return "CMA";
+        case VCSM_INIT_LEGACY:
+            return "Legacy";
+        case VCSM_INIT_NONE:
+            return "none";
+        default:
+            break;
+    }
+    return "???";
+}
+
+
--- a/modules/hw/mmal/mmal_picture.h
+++ b/modules/hw/mmal/mmal_picture.h
@@ -24,19 +24,298 @@
 #ifndef VLC_MMAL_MMAL_PICTURE_H_
 #define VLC_MMAL_MMAL_PICTURE_H_
 
+#include <stdatomic.h>
+
 #include <vlc_common.h>
 #include <interface/mmal/mmal.h>
 
+#include "mmal_cma.h"
+
 /* Think twice before changing this. Incorrect values cause havoc. */
 #define NUM_ACTUAL_OPAQUE_BUFFERS 30
 
-struct picture_sys_t {
-    vlc_object_t *owner;
+#ifndef VLC_TICK_INVALID
+#define VLC_TICK_INVALID VLC_TS_INVALID
+#define VLC_VER_3 1
+#else
+#define VLC_VER_3 0
+#endif
+
+typedef struct mmal_port_pool_ref_s
+{
+    atomic_uint refs;
+    MMAL_POOL_T * pool;
+    MMAL_PORT_T * port;
+} hw_mmal_port_pool_ref_t;
+
+typedef struct pic_ctx_subpic_s {
+    picture_t * subpic;
+    int x, y;
+    int alpha;
+} pic_ctx_subpic_t;
+
+
+#define CTX_BUFS_MAX 4
+typedef struct pic_ctx_mmal_s {
+    picture_context_t cmn;  // PARENT: Common els at start
+
+    cma_buf_t * cb;
+
+    unsigned int buf_count;
+    MMAL_BUFFER_HEADER_T * bufs[CTX_BUFS_MAX];
+
+} pic_ctx_mmal_t;
+
+const char * str_fourcc(char * const buf, const unsigned int fcc);
+
+MMAL_FOURCC_T vlc_to_mmal_video_fourcc(const video_frame_format_t * const vf_vlc);
+MMAL_FOURCC_T vlc_to_mmal_color_space(const video_color_space_t vlc_cs);
+void hw_mmal_vlc_fmt_to_mmal_fmt(MMAL_ES_FORMAT_T *const es_fmt, const video_frame_format_t * const vf_vlc);
+// Returns true if fmt_changed
+// frame_rate ignored for compare, but is set if something else is updated
+bool hw_mmal_vlc_pic_to_mmal_fmt_update(MMAL_ES_FORMAT_T *const es_fmt, const picture_t * const pic);
+
+// Copy pic contents into an existing buffer
+int hw_mmal_copy_pic_to_buf(void * const buf_data, uint32_t * const pLength,
+                            const MMAL_ES_FORMAT_T * const fmt, const picture_t * const pic);
+
+hw_mmal_port_pool_ref_t * hw_mmal_port_pool_ref_create(MMAL_PORT_T * const port,
+   const unsigned int headers, const uint32_t payload_size);
+void hw_mmal_port_pool_ref_release(hw_mmal_port_pool_ref_t * const ppr, const bool in_cb);
+bool hw_mmal_port_pool_ref_recycle(hw_mmal_port_pool_ref_t * const ppr, MMAL_BUFFER_HEADER_T * const buf);
+MMAL_STATUS_T hw_mmal_port_pool_ref_fill(hw_mmal_port_pool_ref_t * const ppr);
+static inline void hw_mmal_port_pool_ref_acquire(hw_mmal_port_pool_ref_t * const ppr)
+{
+    atomic_fetch_add(&ppr->refs, 1);
+}
+MMAL_STATUS_T hw_mmal_opaque_output(vlc_object_t * const obj,
+                                    hw_mmal_port_pool_ref_t ** pppr,
+                                    MMAL_PORT_T * const port,
+                                    const unsigned int extra_buffers, MMAL_PORT_BH_CB_T callback);
+
+static inline int hw_mmal_pic_has_sub_bufs(picture_t * const pic)
+{
+    pic_ctx_mmal_t * const ctx = (pic_ctx_mmal_t *)pic->context;
+    return ctx->buf_count > 1;
+}
+
+static inline void hw_mmal_pic_sub_buf_add(picture_t * const pic, MMAL_BUFFER_HEADER_T * const sub)
+{
+    pic_ctx_mmal_t * const ctx = (pic_ctx_mmal_t *)pic->context;
+
+    if (ctx->buf_count >= CTX_BUFS_MAX) {
+        mmal_buffer_header_release(sub);
+        return;
+    }
+
+    ctx->bufs[ctx->buf_count++] = sub;
+}
+
+static inline MMAL_BUFFER_HEADER_T * hw_mmal_pic_sub_buf_get(picture_t * const pic, const unsigned int n)
+{
+    pic_ctx_mmal_t * const ctx = (pic_ctx_mmal_t *)pic->context;
+
+    return n + 1 > ctx->buf_count ? NULL : ctx->bufs[n + 1];
+}
+
+static inline bool hw_mmal_chroma_is_mmal(const vlc_fourcc_t chroma)
+{
+    return
+        chroma == VLC_CODEC_MMAL_OPAQUE ||
+        chroma == VLC_CODEC_MMAL_ZC_SAND8 ||
+        chroma == VLC_CODEC_MMAL_ZC_SAND10 ||
+        chroma == VLC_CODEC_MMAL_ZC_SAND30 ||
+        chroma == VLC_CODEC_MMAL_ZC_I420 ||
+        chroma == VLC_CODEC_MMAL_ZC_RGB32;
+}
+
+static inline bool hw_mmal_pic_is_mmal(const picture_t * const pic)
+{
+    return hw_mmal_chroma_is_mmal(pic->format.i_chroma);
+}
+
+picture_context_t * hw_mmal_pic_ctx_copy(picture_context_t * pic_ctx_cmn);
+void hw_mmal_pic_ctx_destroy(picture_context_t * pic_ctx_cmn);
+picture_context_t * hw_mmal_gen_context(
+    MMAL_BUFFER_HEADER_T * buf, hw_mmal_port_pool_ref_t * const ppr);
+
+int hw_mmal_get_gpu_mem(void);
+
+
+static inline MMAL_STATUS_T port_parameter_set_uint32(MMAL_PORT_T * port, uint32_t id, uint32_t val)
+{
+    const MMAL_PARAMETER_UINT32_T param = {
+        .hdr = {.id = id, .size = sizeof(MMAL_PARAMETER_UINT32_T)},
+        .value = val
+    };
+    return mmal_port_parameter_set(port, &param.hdr);
+}
+
+static inline MMAL_STATUS_T port_parameter_set_bool(MMAL_PORT_T * const port, const uint32_t id, const bool val)
+{
+    const MMAL_PARAMETER_BOOLEAN_T param = {
+        .hdr = {.id = id, .size = sizeof(MMAL_PARAMETER_BOOLEAN_T)},
+        .enable = val
+    };
+    return mmal_port_parameter_set(port, &param.hdr);
+}
+
+static inline MMAL_STATUS_T port_send_replicated(MMAL_PORT_T * const port, MMAL_POOL_T * const rep_pool,
+                                          MMAL_BUFFER_HEADER_T * const src_buf,
+                                          const uint64_t seq)
+{
+    MMAL_STATUS_T err;
+    MMAL_BUFFER_HEADER_T *const rep_buf = mmal_queue_wait(rep_pool->queue);
+
+    if (rep_buf == NULL)
+        return MMAL_ENOSPC;
+
+    if ((err = mmal_buffer_header_replicate(rep_buf, src_buf)) != MMAL_SUCCESS)
+        return err;
+
+    rep_buf->pts = seq;
+
+    if ((err = mmal_port_send_buffer(port, rep_buf)) != MMAL_SUCCESS)
+    {
+        mmal_buffer_header_release(rep_buf);
+        return err;
+    }
+
+    return MMAL_SUCCESS;
+}
+
+
+static inline void pic_to_buf_copy_props(MMAL_BUFFER_HEADER_T * const buf, const picture_t * const pic)
+{
+    if (!pic->b_progressive)
+    {
+        buf->flags |= MMAL_BUFFER_HEADER_VIDEO_FLAG_INTERLACED;
+        buf->type->video.flags |= MMAL_BUFFER_HEADER_VIDEO_FLAG_INTERLACED;
+    }
+    else
+    {
+        buf->flags &= ~MMAL_BUFFER_HEADER_VIDEO_FLAG_INTERLACED;
+        buf->type->video.flags &= ~MMAL_BUFFER_HEADER_VIDEO_FLAG_INTERLACED;
+    }
+    if (pic->b_top_field_first)
+    {
+        buf->flags |= MMAL_BUFFER_HEADER_VIDEO_FLAG_TOP_FIELD_FIRST;
+        buf->type->video.flags |= MMAL_BUFFER_HEADER_VIDEO_FLAG_TOP_FIELD_FIRST;
+    }
+    else
+    {
+        buf->flags &= ~MMAL_BUFFER_HEADER_VIDEO_FLAG_TOP_FIELD_FIRST;
+        buf->type->video.flags &= ~MMAL_BUFFER_HEADER_VIDEO_FLAG_TOP_FIELD_FIRST;
+    }
+    buf->pts = pic->date != VLC_TICK_INVALID ? pic->date : MMAL_TIME_UNKNOWN;
+    buf->dts = buf->pts;
+}
+
+static inline void buf_to_pic_copy_props(picture_t * const pic, const MMAL_BUFFER_HEADER_T * const buf)
+{
+    // Contrary to docn the interlace & tff flags turn up in the header flags rather than the
+    // video specific flags (which appear to be currently unused).
+    pic->b_progressive = (buf->flags & MMAL_BUFFER_HEADER_VIDEO_FLAG_INTERLACED) == 0;
+    pic->b_top_field_first = (buf->flags & MMAL_BUFFER_HEADER_VIDEO_FLAG_TOP_FIELD_FIRST) != 0;
+
+    pic->date = buf->pts != MMAL_TIME_UNKNOWN ? buf->pts :
+        buf->dts != MMAL_TIME_UNKNOWN ? buf->dts :
+            VLC_TICK_INVALID;
+}
+
+MMAL_BUFFER_HEADER_T * hw_mmal_pic_buf_copied(const picture_t *const pic,
+                                              MMAL_POOL_T * const rep_pool,
+                                              MMAL_PORT_T * const port,
+                                              cma_buf_pool_t * const cbp);
+
+MMAL_BUFFER_HEADER_T * hw_mmal_pic_buf_replicated(const picture_t *const pic, MMAL_POOL_T * const rep_pool);
+
+struct vzc_pool_ctl_s;
+typedef struct vzc_pool_ctl_s vzc_pool_ctl_t;
+
+// At the moment we cope with any mono-planar RGBA thing
+// We could cope with many other things but they currently don't occur
+extern const vlc_fourcc_t hw_mmal_vzc_subpicture_chromas[];
+static inline bool hw_mmal_vzc_subpic_fmt_valid(const video_frame_format_t * const vf_vlc)
+{
+    const vlc_fourcc_t vfcc_src = vf_vlc->i_chroma;
+    for (const vlc_fourcc_t * p = hw_mmal_vzc_subpicture_chromas; *p != 0; ++p)
+        if (*p == vfcc_src)
+            return true;
+
+    return false;
+}
+
+bool hw_mmal_vzc_buf_set_format(MMAL_BUFFER_HEADER_T * const buf, MMAL_ES_FORMAT_T * const es_fmt);
+MMAL_DISPLAYREGION_T * hw_mmal_vzc_buf_region(MMAL_BUFFER_HEADER_T * const buf);
+void hw_mmal_vzc_buf_scale_dest_rect(MMAL_BUFFER_HEADER_T * const buf, const MMAL_RECT_T * const scale_rect);
+void hw_mmal_vzc_buf_get_wh(MMAL_BUFFER_HEADER_T * const buf, int * const pW, int * const pH);
+unsigned int hw_mmal_vzc_buf_seq(MMAL_BUFFER_HEADER_T * const buf);
+MMAL_BUFFER_HEADER_T * hw_mmal_vzc_buf_from_pic(vzc_pool_ctl_t * const pc, picture_t * const pic,
+                                                const MMAL_RECT_T dst_pic_rect,
+                                                const int x_offset, const int y_offset,
+                                                const unsigned int alpha, const bool is_first);
+void hw_mmal_vzc_buf_frame_size(MMAL_BUFFER_HEADER_T * const buf,
+                                uint32_t * const pWidth, uint32_t * const pHeight);
+
+void hw_mmal_vzc_pool_flush(vzc_pool_ctl_t * const pc);
+void hw_mmal_vzc_pool_release(vzc_pool_ctl_t * const pc);
+void hw_mmal_vzc_pool_ref(vzc_pool_ctl_t * const pc);
+vzc_pool_ctl_t * hw_mmal_vzc_pool_new(void);
+
+
+static inline MMAL_RECT_T vis_mmal_rect(const video_format_t * const fmt)
+{
+    return (MMAL_RECT_T){
+        .x      = fmt->i_x_offset,
+        .y      = fmt->i_y_offset,
+        .width  = fmt->i_visible_width,
+        .height = fmt->i_visible_height
+    };
+}
+
+int cma_pic_set_data(picture_t * const pic,
+                    const MMAL_ES_FORMAT_T * const mm_esfmt,
+                    const MMAL_BUFFER_HEADER_T * const buf);
+
+// Attaches cma buf to pic
+// Marks in_flight if not all_in_flight anyway
+int cma_buf_pic_attach(cma_buf_t * const cb, picture_t * const pic);
+// Returns a pointer to the cma_buf attached to the pic
+// Just a pointer - doesn't add a ref
+cma_buf_t * cma_buf_pic_get(picture_t * const pic);
+
+static inline bool is_cma_buf_pic_chroma(const uint32_t chroma)
+{
+    return chroma == VLC_CODEC_MMAL_ZC_RGB32 ||
+        chroma == VLC_CODEC_MMAL_ZC_SAND8 ||
+        chroma == VLC_CODEC_MMAL_ZC_SAND10 ||
+        chroma == VLC_CODEC_MMAL_ZC_SAND30 ||
+        chroma == VLC_CODEC_MMAL_ZC_I420;
+}
+
+
+int rpi_get_model_type(void);
+bool rpi_is_model_pi4(void);
+bool rpi_is_fkms_active(void);
+
+typedef enum vcsm_init_type_e {
+    VCSM_INIT_NONE = 0,
+    VCSM_INIT_LEGACY,
+    VCSM_INIT_CMA
+} vcsm_init_type_t;
+
+vcsm_init_type_t cma_vcsm_init(void);
+void cma_vcsm_exit(const vcsm_init_type_t init_mode);
+vcsm_init_type_t cma_vcsm_type(void);
+const char * cma_vcsm_init_str(const vcsm_init_type_t init_mode);
+
 
-    MMAL_BUFFER_HEADER_T *buffer;
-    bool displayed;
-};
+#define VOUT_DISPLAY_CHANGE_MMAL_BASE 1024
+#define VOUT_DISPLAY_CHANGE_MMAL_HIDE (VOUT_DISPLAY_CHANGE_MMAL_BASE + 0)
 
-int mmal_picture_lock(picture_t *picture);
+#define MMAL_COMPONENT_DEFAULT_RESIZER "vc.ril.resize"
+#define MMAL_COMPONENT_ISP_RESIZER     "vc.ril.isp"
+#define MMAL_COMPONENT_HVS             "vc.ril.hvs"
 
 #endif
--- /dev/null
+++ b/modules/hw/mmal/rpi_prof.h
@@ -0,0 +1,110 @@
+#ifndef RPI_PROFILE_H
+#define RPI_PROFILE_H
+
+#include <stdint.h>
+#include <inttypes.h>
+
+#ifndef RPI_PROFILE
+#define RPI_PROFILE 0
+#endif
+
+#if RPI_PROFILE
+
+#include "v7_pmu.h"
+
+#ifdef RPI_PROC_ALLOC
+#define X volatile
+#define Z =0
+#else
+#define X extern volatile
+#define Z
+#endif
+
+X uint64_t av_rpi_prof0_cycles Z;
+X unsigned int av_rpi_prof0_cnt Z;
+#define RPI_prof0_MAX_DURATION 100000
+
+X uint64_t av_rpi_prof1_cycles Z;
+X unsigned int av_rpi_prof1_cnt Z;
+#define RPI_prof1_MAX_DURATION 100000
+
+X uint64_t av_rpi_prof2_cycles Z;
+X unsigned int av_rpi_prof2_cnt Z;
+#define RPI_prof2_MAX_DURATION 10000
+
+X uint64_t av_rpi_prof_n_cycles[128];
+X unsigned int av_rpi_prof_n_cnt[128];
+#define RPI_prof_n_MAX_DURATION 10000
+
+
+#undef X
+#undef Z
+
+#define PROFILE_INIT()\
+do {\
+    enable_pmu();\
+    enable_ccnt();\
+} while (0)
+
+#define PROFILE_START()\
+do {\
+    volatile uint32_t perf_1 = read_ccnt();\
+    volatile uint32_t perf_2
+
+
+#define PROFILE_ACC(x)\
+    perf_2 = read_ccnt();\
+    {\
+        const uint32_t duration = perf_2 - perf_1;\
+        if (duration < RPI_##x##_MAX_DURATION)\
+        {\
+            av_rpi_##x##_cycles += duration;\
+            av_rpi_##x##_cnt += 1;\
+        }\
+    }\
+} while(0)
+
+
+#define PROFILE_ACC_N(n)\
+    if ((n) >= 0) {\
+        perf_2 = read_ccnt();\
+        {\
+            const uint32_t duration = perf_2 - perf_1;\
+            if (duration < RPI_prof_n_MAX_DURATION)\
+            {\
+                av_rpi_prof_n_cycles[n] += duration;\
+                av_rpi_prof_n_cnt[n] += 1;\
+            }\
+        }\
+    }\
+} while(0)
+
+#define PROFILE_PRINTF(x)\
+    printf("%-20s cycles=%14" PRIu64 ";  cnt=%8u;  avg=%5" PRIu64 "\n", #x, av_rpi_##x##_cycles, av_rpi_##x##_cnt,\
+        av_rpi_##x##_cnt == 0 ? (uint64_t)0 : av_rpi_##x##_cycles / (uint64_t)av_rpi_##x##_cnt)
+
+#define PROFILE_PRINTF_N(n)\
+    printf("prof[%d] cycles=%14" PRIu64 ";  cnt=%8u;  avg=%5" PRIu64 "\n", (n), av_rpi_prof_n_cycles[n], av_rpi_prof_n_cnt[n],\
+        av_rpi_prof_n_cnt[n] == 0 ? (uint64_t)0 : av_rpi_prof_n_cycles[n] / (uint64_t)av_rpi_prof_n_cnt[n])
+
+#define PROFILE_CLEAR_N(n) \
+do {\
+    av_rpi_prof_n_cycles[n] = 0;\
+    av_rpi_prof_n_cnt[n] = 0;\
+} while(0)
+
+#else
+
+// No profile
+#define PROFILE_INIT()
+#define PROFILE_START()
+#define PROFILE_ACC(x)
+#define PROFILE_ACC_N(x)
+#define PROFILE_PRINTF(x)
+#define PROFILE_PRINTF_N(x)
+#define PROFILE_CLEAR_N(n)
+
+#endif
+
+#endif
+
--- /dev/null
+++ b/modules/hw/mmal/subpic.c
@@ -0,0 +1,234 @@
+/*****************************************************************************
+ * mmal.c: MMAL-based decoder plugin for Raspberry Pi
+ *****************************************************************************
+ * Authors: jc@kynesim.co.uk
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdatomic.h>
+
+#include <vlc_common.h>
+#include <vlc_plugin.h>
+#include <vlc_codec.h>
+#include <vlc_filter.h>
+#include <vlc_threads.h>
+
+#include <bcm_host.h>
+#include <interface/mmal/mmal.h>
+#include <interface/mmal/util/mmal_util.h>
+#include <interface/mmal/util/mmal_default_components.h>
+
+#include "mmal_picture.h"
+#include "subpic.h"
+
+
+#define TRACE_ALL 0
+
+static inline bool cmp_rect(const MMAL_RECT_T * const a, const MMAL_RECT_T * const b)
+{
+    return a->x == b->x && a->y == b->y && a->width == b->width && a->height == b->height;
+}
+
+void hw_mmal_subpic_flush(vlc_object_t * const p_filter, subpic_reg_stash_t * const sub)
+{
+    VLC_UNUSED(p_filter);
+    if (sub->port != NULL && sub->port->is_enabled)
+        mmal_port_disable(sub->port);
+    sub->seq = 0;
+}
+
+void hw_mmal_subpic_close(vlc_object_t * const p_filter, subpic_reg_stash_t * const spe)
+{
+    hw_mmal_subpic_flush(p_filter, spe);
+
+    if (spe->pool != NULL)
+        mmal_pool_destroy(spe->pool);
+
+    // Zap to avoid any accidental reuse
+    *spe = (subpic_reg_stash_t){NULL};
+}
+
+MMAL_STATUS_T hw_mmal_subpic_open(vlc_object_t * const p_filter, subpic_reg_stash_t * const spe, MMAL_PORT_T * const port,
+                                  const int display_id, const unsigned int layer)
+{
+    MMAL_STATUS_T err;
+
+    // Start by zapping all to zero
+    *spe = (subpic_reg_stash_t){NULL};
+
+    if ((err = port_parameter_set_bool(port, MMAL_PARAMETER_ZERO_COPY, true)) != MMAL_SUCCESS)
+    {
+        msg_Err(p_filter, "Failed to set sub port zero copy");
+        return err;
+    }
+
+    if ((spe->pool = mmal_pool_create(30, 0)) == NULL)
+    {
+        msg_Err(p_filter, "Failed to create sub pool");
+        return MMAL_ENOMEM;
+    }
+
+    port->userdata = (void *)p_filter;
+    spe->port = port;
+    spe->display_id = display_id;
+    spe->layer = layer;
+
+    return MMAL_SUCCESS;
+}
+
+static void conv_subpic_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buf)
+{
+#if TRACE_ALL
+    msg_Dbg((filter_t *)port->userdata, "<<< %s cmd=%d, user=%p, buf=%p, flags=%#x, len=%d/%d, pts=%lld",
+            __func__, buf->cmd, buf->user_data, buf, buf->flags, buf->length, buf->alloc_size, (long long)buf->pts);
+#else
+    VLC_UNUSED(port);
+#endif
+
+    mmal_buffer_header_release(buf);  // Will extract & release pic in pool callback
+}
+
+
+int hw_mmal_subpic_update(vlc_object_t * const p_filter,
+    MMAL_BUFFER_HEADER_T * const sub_buf,
+    subpic_reg_stash_t * const spe,
+    const video_format_t * const fmt,
+    const MMAL_RECT_T * const scale_out,
+    const uint64_t pts)
+{
+    MMAL_STATUS_T err;
+
+    if (sub_buf == NULL)
+    {
+        if (spe->port->is_enabled && spe->seq != 0)
+        {
+            MMAL_BUFFER_HEADER_T *const buf = mmal_queue_wait(spe->pool->queue);
+
+            if (buf == NULL) {
+                msg_Err(p_filter, "Buffer get for subpic failed");
+                return -1;
+            }
+#if TRACE_ALL
+            msg_Dbg(p_filter, "Remove pic for sub %d", spe->seq);
+#endif
+            buf->cmd = 0;
+            buf->data = NULL;
+            buf->alloc_size = 0;
+            buf->offset = 0;
+            buf->flags = MMAL_BUFFER_HEADER_FLAG_FRAME_END;
+            buf->pts = pts;
+            buf->dts = MMAL_TIME_UNKNOWN;
+            buf->user_data = NULL;
+
+            if ((err = mmal_port_send_buffer(spe->port, buf)) != MMAL_SUCCESS)
+            {
+                msg_Err(p_filter, "Send buffer to subput failed");
+                mmal_buffer_header_release(buf);
+                return -1;
+            }
+
+            spe->seq = 0;
+        }
+    }
+    else
+    {
+        const unsigned int seq = hw_mmal_vzc_buf_seq(sub_buf);
+        bool needs_update = (spe->seq != seq);
+
+        hw_mmal_vzc_buf_scale_dest_rect(sub_buf, scale_out);
+
+        if (hw_mmal_vzc_buf_set_format(sub_buf, spe->port->format))
+        {
+            MMAL_DISPLAYREGION_T * const dreg = hw_mmal_vzc_buf_region(sub_buf);
+            MMAL_VIDEO_FORMAT_T *const v_fmt = &spe->port->format->es->video;
+
+            v_fmt->frame_rate.den = fmt->i_frame_rate_base;
+            v_fmt->frame_rate.num = fmt->i_frame_rate;
+            v_fmt->par.den        = fmt->i_sar_den;
+            v_fmt->par.num        = fmt->i_sar_num;
+            v_fmt->color_space = MMAL_COLOR_SPACE_UNKNOWN;
+
+            if (needs_update || dreg->alpha != spe->alpha || !cmp_rect(&dreg->dest_rect, &spe->dest_rect)) {
+
+                spe->alpha = dreg->alpha;
+                spe->dest_rect = dreg->dest_rect;
+                needs_update = true;
+
+                if (spe->display_id >= 0)
+                {
+                    dreg->display_num = spe->display_id;
+                    dreg->set |= MMAL_DISPLAY_SET_NUM;
+                }
+                dreg->layer = spe->layer;
+                dreg->set |= MMAL_DISPLAY_SET_LAYER;
+
+#if TRACE_ALL
+                msg_Dbg(p_filter, "%s: Update region: Set=%x, dest=%dx%d @ (%d,%d), src=%dx%d @ (%d,%d), layer=%d, alpha=%#x",
+                        __func__, dreg->set,
+                        dreg->dest_rect.width, dreg->dest_rect.height, dreg->dest_rect.x, dreg->dest_rect.y,
+                        dreg->src_rect.width, dreg->src_rect.height, dreg->src_rect.x, dreg->src_rect.y,
+                        dreg->layer, dreg->alpha);
+#endif
+
+                if ((err = mmal_port_parameter_set(spe->port, &dreg->hdr)) != MMAL_SUCCESS)
+                {
+                    msg_Err(p_filter, "Set display region on subput failed");
+                    return -1;
+                }
+
+                if ((err = mmal_port_format_commit(spe->port)) != MMAL_SUCCESS)
+                {
+                    msg_Dbg(p_filter, "%s: Subpic commit fail: %d", __func__, err);
+                    return -1;
+                }
+            }
+        }
+
+        if (!spe->port->is_enabled)
+        {
+            spe->port->buffer_num = 30;
+            spe->port->buffer_size = spe->port->buffer_size_recommended;  // Not used but shuts up the error checking
+
+            if ((err = mmal_port_enable(spe->port, conv_subpic_cb)) != MMAL_SUCCESS)
+            {
+                msg_Dbg(p_filter, "%s: Subpic enable fail: %d", __func__, err);
+                return -1;
+            }
+        }
+
+        if (needs_update)
+        {
+#if TRACE_ALL
+            msg_Dbg(p_filter, "Update pic for sub %d", spe->seq);
+#endif
+            if ((err = port_send_replicated(spe->port, spe->pool, sub_buf, pts)) != MMAL_SUCCESS)
+            {
+                msg_Err(p_filter, "Send buffer to subput failed");
+                return -1;
+            }
+
+            spe->seq = seq;
+        }
+    }
+    return 1;
+}
+
+
+
--- /dev/null
+++ b/modules/hw/mmal/subpic.h
@@ -0,0 +1,32 @@
+#ifndef VLC_HW_MMAL_SUBPIC_H_
+#define VLC_HW_MMAL_SUBPIC_H_
+
+typedef struct subpic_reg_stash_s
+{
+    MMAL_PORT_T * port;
+    MMAL_POOL_T * pool;
+    int display_id;  // -1 => do not set
+    unsigned int layer;
+    // Shadow  vars so we can tell if stuff has changed
+    MMAL_RECT_T dest_rect;
+    unsigned int alpha;
+    unsigned int seq;
+} subpic_reg_stash_t;
+
+int hw_mmal_subpic_update(vlc_object_t * const p_filter,
+                          MMAL_BUFFER_HEADER_T * const sub_buf,
+                          subpic_reg_stash_t * const spe,
+                          const video_format_t * const fmt,
+                          const MMAL_RECT_T * const scale_out,
+                          const uint64_t pts);
+
+void hw_mmal_subpic_flush(vlc_object_t * const p_filter, subpic_reg_stash_t * const spe);
+
+void hw_mmal_subpic_close(vlc_object_t * const p_filter, subpic_reg_stash_t * const spe);
+
+// If display id is -1 it will be unset
+MMAL_STATUS_T hw_mmal_subpic_open(vlc_object_t * const p_filter, subpic_reg_stash_t * const spe, MMAL_PORT_T * const port,
+                                  const int display_id, const unsigned int layer);
+
+#endif
+
--- /dev/null
+++ b/modules/hw/mmal/v7_pmu.S
@@ -0,0 +1,263 @@
+/*------------------------------------------------------------
+Performance Monitor Block
+------------------------------------------------------------*/
+    .arm  @ Make sure we are in ARM mode.
+    .text
+    .align 2
+    .global getPMN @ export this function for the linker
+
+/*  Returns the number of progammable counters uint32_t getPMN(void) */
+
+getPMN:
+  MRC     p15, 0, r0, c9, c12, 0 /* Read PMNC Register	*/
+  MOV     r0, r0, LSR #11        /* Shift N field down to bit 0	*/
+  AND     r0, r0, #0x1F          /* Mask to leave just the 5 N bits	*/
+  BX      lr
+
+
+
+    .global pmn_config @ export this function for the linker
+  /* Sets the event for a programmable counter to record	*/
+  /* void pmn_config(unsigned counter, uint32_t event)	*/
+  /* counter = r0 = Which counter to program  (e.g. 0 for PMN0, 1 for PMN1 */
+  /* event   = r1 = The event code	*/
+pmn_config:
+  AND     r0, r0, #0x1F          /* Mask to leave only bits 4:0	*/
+  MCR     p15, 0, r0, c9, c12, 5 /* Write PMNXSEL Register	*/
+  MCR     p15, 0, r1, c9, c13, 1 /* Write EVTSELx Register	*/
+  BX      lr
+
+
+
+    .global ccnt_divider @ export this function for the linker
+  /* Enables/disables the divider (1/64) on CCNT	*/
+  /* void ccnt_divider(int divider)	*/
+  /* divider = r0 = If 0 disable divider, else enable dvider	*/
+ccnt_divider:
+  MRC     p15, 0, r1, c9, c12, 0  /* Read PMNC	*/
+
+  CMP     r0, #0x0                /* IF (r0 == 0)	*/
+  BICEQ   r1, r1, #0x08           /* THEN: Clear the D bit (disables the */
+  ORRNE   r1, r1, #0x08           /* ELSE: Set the D bit (enables the di */
+
+  MCR     p15, 0, r1, c9, c12, 0  /* Write PMNC	*/
+  BX      lr
+
+
+  /* ---------------------------------------------------------------	*/
+  /* Enable/Disable	*/
+  /* ---------------------------------------------------------------	*/
+
+    .global enable_pmu @ export this function for the linker
+  /* Global PMU enable	*/
+  /* void enable_pmu(void)	*/
+enable_pmu:
+  MRC     p15, 0, r0, c9, c12, 0  /* Read PMNC	*/
+  ORR     r0, r0, #0x01           /* Set E bit	*/
+  MCR     p15, 0, r0, c9, c12, 0  /* Write PMNC	*/
+  BX      lr
+
+
+
+    .global disable_pmu @ export this function for the linker
+  /* Global PMU disable	*/
+  /* void disable_pmu(void)	*/
+disable_pmu:
+  MRC     p15, 0, r0, c9, c12, 0  /* Read PMNC	*/
+  BIC     r0, r0, #0x01           /* Clear E bit	*/
+  MCR     p15, 0, r0, c9, c12, 0  /* Write PMNC	*/
+  BX      lr
+
+
+
+    .global enable_ccnt @ export this function for the linker
+  /* Enable the CCNT	*/
+  /* void enable_ccnt(void)	*/
+enable_ccnt:
+  MOV     r0, #0x80000000         /* Set C bit	*/
+  MCR     p15, 0, r0, c9, c12, 1  /* Write CNTENS Register	*/
+  BX      lr
+
+
+
+    .global disable_ccnt @ export this function for the linker
+  /* Disable the CCNT	*/
+  /* void disable_ccnt(void)	*/
+disable_ccnt:
+  MOV     r0, #0x80000000         /* Clear C bit	*/
+  MCR     p15, 0, r0, c9, c12, 2  /* Write CNTENC Register	*/
+  BX      lr
+
+
+
+    .global enable_pmn @ export this function for the linker
+  /* Enable PMN{n}	*/
+  /* void enable_pmn(uint32_t counter)	*/
+  /* counter = r0 = The counter to enable (e.g. 0 for PMN0, 1 for PMN1)
+enable_pmn: */
+  MOV     r1, #0x1                /* Use arg (r0) to set which counter t */
+  MOV     r1, r1, LSL r0
+
+  MCR     p15, 0, r1, c9, c12, 1  /* Write CNTENS Register	*/
+  BX      lr
+
+
+
+    .global disable_pmn @ export this function for the linker
+  /* Enable PMN{n}	*/
+  /* void disable_pmn(uint32_t counter)	*/
+  /* counter = r0 = The counter to enable (e.g. 0 for PMN0, 1 for PMN1)
+disable_pmn: */
+  MOV     r1, #0x1                /* Use arg (r0) to set which counter t */
+  MOV     r1, r1, LSL r0
+
+  MCR     p15, 0, r1, c9, c12, 1  /* Write CNTENS Register	*/
+  BX      lr
+
+
+
+    .global enable_pmu_user_access @ export this function for the linker
+  /* Enables User mode access to the PMU (must be called in a priviledge */
+  /* void enable_pmu_user_access(void)	*/
+enable_pmu_user_access:
+  MRC     p15, 0, r0, c9, c14, 0  /* Read PMUSERENR Register	*/
+  ORR     r0, r0, #0x01           /* Set EN bit (bit 0)	*/
+  MCR     p15, 0, r0, c9, c14, 0  /* Write PMUSERENR Register	*/
+  BX      lr
+
+
+
+    .global disable_pmu_user_access @ export this function for the linke
+  /* Disables User mode access to the PMU (must be called in a priviledg */
+  /* void disable_pmu_user_access(void)	*/
+disable_pmu_user_access:
+  MRC     p15, 0, r0, c9, c14, 0  /* Read PMUSERENR Register	*/
+  BIC     r0, r0, #0x01           /* Clear EN bit (bit 0)	*/
+  MCR     p15, 0, r0, c9, c14, 0  /* Write PMUSERENR Register	*/
+  BX      lr
+
+
+  /* ---------------------------------------------------------------	*/
+  /* Counter read registers	*/
+  /* ---------------------------------------------------------------	*/
+
+    .global read_ccnt @ export this function for the linker
+  /* Returns the value of CCNT	*/
+  /* uint32_t read_ccnt(void)	*/
+read_ccnt:
+  MRC     p15, 0, r0, c9, c13, 0 /* Read CCNT Register	*/
+  BX      lr
+
+
+    .global read_pmn @ export this function for the linker
+  /* Returns the value of PMN{n}	*/
+  /* uint32_t read_pmn(uint32_t counter)	*/
+  /* counter = r0 =  The counter to read (e.g. 0 for PMN0, 1 for PMN1)	*
+read_pmn: */
+  AND     r0, r0, #0x1F          /* Mask to leave only bits 4:0	*/
+  MCR     p15, 0, r0, c9, c12, 5 /* Write PMNXSEL Register	*/
+  MRC     p15, 0, r0, c9, c13, 2 /* Read current PMNx Register	*/
+  BX      lr
+
+
+  /* ---------------------------------------------------------------	*/
+  /* Software Increment	*/
+  /* ---------------------------------------------------------------	*/
+
+    .global pmu_software_increment @ export this function for the linker
+	/* Writes to software increment register	*/
+	/* void pmu_software_increment(uint32_t counter)	*/
+	/* counter = r0 =  The counter to increment (e.g. 0 for PMN0, 1 for PMN
+pmu_software_increment: */
+  MOV     r1, #0x01
+  MOV			r1, r1, LSL r0
+  MCR     p15, 0, r1, c9, c12, 4 /* Write SWINCR Register	*/
+  BX      lr
+
+  /* ---------------------------------------------------------------	*/
+  /* Overflow & Interrupt Generation	*/
+  /* ---------------------------------------------------------------	*/
+
+    .global read_flags @ export this function for the linker
+  /* Returns the value of the overflow flags	*/
+  /* uint32_t read_flags(void)	*/
+read_flags:
+  MRC     p15, 0, r0, c9, c12, 3 /* Read FLAG Register	*/
+  BX      lr
+
+
+    .global write_flags @ export this function for the linker
+  /* Writes the overflow flags	*/
+  /* void write_flags(uint32_t flags)	*/
+write_flags:
+  MCR     p15, 0, r0, c9, c12, 3 /* Write FLAG Register	*/
+  BX      lr
+
+
+    .global enable_ccnt_irq @ export this function for the linker
+  /* Enables interrupt generation on overflow of the CCNT	*/
+  /* void enable_ccnt_irq(void)	*/
+enable_ccnt_irq:
+  MOV     r0, #0x80000000
+  MCR     p15, 0, r0, c9, c14, 1  /* Write INTENS Register	*/
+  BX      lr
+
+    .global disable_ccnt_irq @ export this function for the linker
+  /* Disables interrupt generation on overflow of the CCNT	*/
+  /* void disable_ccnt_irq(void)	*/
+disable_ccnt_irq:
+  MOV     r0, #0x80000000
+  MCR     p15, 0, r0, c9, c14, 2   /* Write INTENC Register	*/
+  BX      lr
+
+
+    .global enable_pmn_irq @ export this function for the linker
+  /* Enables interrupt generation on overflow of PMN{x}	*/
+  /* void enable_pmn_irq(uint32_t counter)	*/
+  /* counter = r0 =  The counter to enable the interrupt for (e.g. 0 for
+enable_pmn_irq: */
+  MOV     r1, #0x1                 /* Use arg (r0) to set which counter */
+  MOV     r0, r1, LSL r0
+  MCR     p15, 0, r0, c9, c14, 1   /* Write INTENS Register	*/
+  BX      lr
+
+    .global disable_pmn_irq @ export this function for the linker
+  /* Disables interrupt generation on overflow of PMN{x}	*/
+  /* void disable_pmn_irq(uint32_t counter)	*/
+  /* counter = r0 =  The counter to disable the interrupt for (e.g. 0 fo
+disable_pmn_irq: */
+  MOV     r1, #0x1                /* Use arg (r0) to set which counter t */
+  MOV     r0, r1, LSL r0
+  MCR     p15, 0, r0, c9, c14, 2  /* Write INTENC Register	*/
+  BX      lr
+
+  /* ---------------------------------------------------------------	*/
+  /* Reset Functions	*/
+  /* ---------------------------------------------------------------	*/
+
+    .global reset_pmn @ export this function for the linker
+  /* Resets the programmable counters	*/
+  /* void reset_pmn(void)	*/
+reset_pmn:
+  MRC     p15, 0, r0, c9, c12, 0  /* Read PMNC	*/
+  ORR     r0, r0, #0x02           /* Set P bit (Event Counter Reset)	*/
+  MCR     p15, 0, r0, c9, c12, 0  /* Write PMNC	*/
+  BX      lr
+
+
+	.global reset_ccnt @ export this function for the linker
+  /* Resets the CCNT	*/
+  /* void reset_ccnt(void)	*/
+reset_ccnt:
+  MRC     p15, 0, r0, c9, c12, 0  /* Read PMNC	*/
+  ORR     r0, r0, #0x04           /* Set C bit (Event Counter Reset)	*/
+  MCR     p15, 0, r0, c9, c12, 0  /* Write PMNC	*/
+  BX      lr
+
+
+    .end @end of code, this line is optional.
+/* ------------------------------------------------------------	*/
+/* End of v7_pmu.s	*/
+/* ------------------------------------------------------------	*/
+
+
--- /dev/null
+++ b/modules/hw/mmal/v7_pmu.h
@@ -0,0 +1,113 @@
+// ------------------------------------------------------------
+// PMU for Cortex-A/R (v7-A/R)
+// ------------------------------------------------------------
+
+#ifndef _V7_PMU_H
+#define _V7_PMU_H
+
+// Returns the number of progammable counters
+unsigned int getPMN(void);
+
+// Sets the event for a programmable counter to record
+// counter = r0 = Which counter to program  (e.g. 0 for PMN0, 1 for PMN1)
+// event   = r1 = The event code (from appropiate TRM or ARM Architecture Reference Manual)
+void pmn_config(unsigned int counter, unsigned int event);
+
+// Enables/disables the divider (1/64) on CCNT
+// divider = r0 = If 0 disable divider, else enable dvider
+void ccnt_divider(int divider);
+
+//
+// Enables and disables
+//
+
+// Global PMU enable
+// On ARM11 this enables the PMU, and the counters start immediately
+// On Cortex this enables the PMU, there are individual enables for the counters
+void enable_pmu(void);
+
+// Global PMU disable
+// On Cortex, this overrides the enable state of the individual counters
+void disable_pmu(void);
+
+// Enable the CCNT
+void enable_ccnt(void);
+
+// Disable the CCNT
+void disable_ccnt(void);
+
+// Enable PMN{n}
+// counter = The counter to enable (e.g. 0 for PMN0, 1 for PMN1)
+void enable_pmn(unsigned int counter);
+
+// Enable PMN{n}
+// counter = The counter to enable (e.g. 0 for PMN0, 1 for PMN1)
+void disable_pmn(unsigned int counter);
+
+//
+// Read counter values
+//
+
+// Returns the value of CCNT
+unsigned int read_ccnt(void);
+
+// Returns the value of PMN{n}
+// counter = The counter to read (e.g. 0 for PMN0, 1 for PMN1)
+unsigned int read_pmn(unsigned int counter);
+
+//
+// Overflow and interrupts
+//
+
+// Returns the value of the overflow flags
+unsigned int read_flags(void);
+
+// Writes the overflow flags
+void write_flags(unsigned int flags);
+
+// Enables interrupt generation on overflow of the CCNT
+void enable_ccnt_irq(void);
+
+// Disables interrupt generation on overflow of the CCNT
+void disable_ccnt_irq(void);
+
+// Enables interrupt generation on overflow of PMN{x}
+// counter = The counter to enable the interrupt for (e.g. 0 for PMN0, 1 for PMN1)
+void enable_pmn_irq(unsigned int counter);
+
+// Disables interrupt generation on overflow of PMN{x}
+// counter = r0 =  The counter to disable the interrupt for (e.g. 0 for PMN0, 1 for PMN1)
+void disable_pmn_irq(unsigned int counter);
+
+//
+// Counter reset functions
+//
+
+// Resets the programmable counters
+void reset_pmn(void);
+
+// Resets the CCNT
+void reset_ccnt(void);
+
+//
+// Software Increment
+
+// Writes to software increment register
+// counter = The counter to increment (e.g. 0 for PMN0, 1 for PMN1)
+void pmu_software_increment(unsigned int counter);
+
+//
+// User mode access
+//
+
+// Enables User mode access to the PMU (must be called in a priviledged mode)
+void enable_pmu_user_access(void);
+
+// Disables User mode access to the PMU (must be called in a priviledged mode)
+void disable_pmu_user_access(void);
+
+#endif
+// ------------------------------------------------------------
+// End of v7_pmu.h
+// ------------------------------------------------------------
+
--- a/modules/hw/mmal/vout.c
+++ b/modules/hw/mmal/vout.c
@@ -27,21 +27,27 @@
 #endif
 
 #include <math.h>
+#include <stdatomic.h>
 
 #include <vlc_common.h>
-#include <vlc_atomic.h>
 #include <vlc_plugin.h>
 #include <vlc_threads.h>
 #include <vlc_vout_display.h>
+#include <vlc_modules.h>
 
-#include "mmal_picture.h"
-
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wbad-function-cast"
 #include <bcm_host.h>
+#pragma GCC diagnostic pop
 #include <interface/mmal/mmal.h>
 #include <interface/mmal/util/mmal_util.h>
 #include <interface/mmal/util/mmal_default_components.h>
 #include <interface/vmcs_host/vc_tvservice.h>
-#include <interface/vmcs_host/vc_dispmanx.h>
+
+#include "mmal_picture.h"
+#include "subpic.h"
+
+#define TRACE_ALL 0
 
 #define MAX_BUFFERS_IN_TRANSIT 1
 #define VC_TV_MAX_MODE_IDS 127
@@ -50,10 +56,12 @@
 #define MMAL_LAYER_TEXT N_("VideoCore layer where the video is displayed.")
 #define MMAL_LAYER_LONGTEXT N_("VideoCore layer where the video is displayed. Subpictures are displayed directly above and a black background directly below.")
 
-#define MMAL_BLANK_BACKGROUND_NAME "mmal-blank-background"
-#define MMAL_BLANK_BACKGROUND_TEXT N_("Blank screen below video.")
-#define MMAL_BLANK_BACKGROUND_LONGTEXT N_("Render blank screen below video. " \
-        "Increases VideoCore load.")
+#define MMAL_DISPLAY_NAME "mmal-display"
+#define MMAL_DISPLAY_TEXT N_("Output device for Rpi fullscreen.")
+#define MMAL_DISPLAY_LONGTEXT N_("Output device for Rpi fullscreen. " \
+"Valid values are HDMI-1,HDMI-2.  By default if qt-fullscreen-screennumber " \
+"is specified (or set by Fullscreen Output Device in Preferences) " \
+"HDMI-<qt-fullscreen-screennumber+1> will be used, otherwise HDMI-1.")
 
 #define MMAL_ADJUST_REFRESHRATE_NAME "mmal-adjust-refreshrate"
 #define MMAL_ADJUST_REFRESHRATE_TEXT N_("Adjust HDMI refresh rate to the video.")
@@ -68,64 +76,33 @@
 #define PHASE_OFFSET_TARGET ((double)0.25)
 #define PHASE_CHECK_INTERVAL 100
 
-static int Open(vlc_object_t *);
-static void Close(vlc_object_t *);
-
-vlc_module_begin()
-    set_shortname(N_("MMAL vout"))
-    set_description(N_("MMAL-based vout plugin for Raspberry Pi"))
-    set_capability("vout display", 90)
-    add_shortcut("mmal_vout")
-    add_integer(MMAL_LAYER_NAME, 1, MMAL_LAYER_TEXT, MMAL_LAYER_LONGTEXT, false)
-    add_bool(MMAL_BLANK_BACKGROUND_NAME, true, MMAL_BLANK_BACKGROUND_TEXT,
-                    MMAL_BLANK_BACKGROUND_LONGTEXT, true);
-    add_bool(MMAL_ADJUST_REFRESHRATE_NAME, false, MMAL_ADJUST_REFRESHRATE_TEXT,
-                    MMAL_ADJUST_REFRESHRATE_LONGTEXT, false)
-    add_bool(MMAL_NATIVE_INTERLACED, false, MMAL_NATIVE_INTERLACE_TEXT,
-                    MMAL_NATIVE_INTERLACE_LONGTEXT, false)
-    set_callbacks(Open, Close)
-vlc_module_end()
+#define SUBS_MAX 4
 
-struct dmx_region_t {
-    struct dmx_region_t *next;
-    picture_t *picture;
-    VC_RECT_T bmp_rect;
-    VC_RECT_T src_rect;
-    VC_RECT_T dst_rect;
-    VC_DISPMANX_ALPHA_T alpha;
-    DISPMANX_ELEMENT_HANDLE_T element;
-    DISPMANX_RESOURCE_HANDLE_T resource;
-    int32_t pos_x;
-    int32_t pos_y;
-};
+typedef struct vout_subpic_s {
+    MMAL_COMPONENT_T *component;
+    subpic_reg_stash_t sub;
+} vout_subpic_t;
 
 struct vout_display_sys_t {
-    vlc_cond_t buffer_cond;
-    vlc_mutex_t buffer_mutex;
     vlc_mutex_t manage_mutex;
 
-    plane_t planes[3]; /* Depending on video format up to 3 planes are used */
-    picture_t **pictures; /* Actual list of alloced pictures passed into picture_pool */
-    picture_pool_t *picture_pool;
-
+    vcsm_init_type_t init_type;
     MMAL_COMPONENT_T *component;
     MMAL_PORT_T *input;
     MMAL_POOL_T *pool; /* mmal buffer headers, used for pushing pictures to component*/
-    struct dmx_region_t *dmx_region;
     int i_planes; /* Number of actually used planes, 1 for opaque, 3 for i420 */
 
-    uint32_t buffer_size; /* size of actual mmal buffers */
     int buffers_in_transit; /* number of buffers currently pushed to mmal component */
     unsigned num_buffers; /* number of buffers allocated at mmal port */
 
-    DISPMANX_DISPLAY_HANDLE_T dmx_handle;
-    DISPMANX_ELEMENT_HANDLE_T bkg_element;
-    DISPMANX_RESOURCE_HANDLE_T bkg_resource;
+    int display_id;
     unsigned display_width;
     unsigned display_height;
 
-    int i_frame_rate_base; /* cached framerate to detect changes for rate adjustment */
-    int i_frame_rate;
+    MMAL_RECT_T dest_rect;      // Output rectangle in display coords
+
+    unsigned int i_frame_rate_base; /* cached framerate to detect changes for rate adjustment */
+    unsigned int i_frame_rate;
 
     int next_phase_check; /* lowpass for phase check frequency */
     int phase_offset; /* currently applied offset to presentation time in ns */
@@ -136,264 +113,485 @@
     bool native_interlaced;
     bool b_top_field_first; /* cached interlaced settings to detect changes for native mode */
     bool b_progressive;
-    bool opaque; /* indicated use of opaque picture format (zerocopy) */
-};
+    bool force_config;
 
-static const vlc_fourcc_t subpicture_chromas[] = {
-    VLC_CODEC_RGBA,
-    0
-};
+    vout_subpic_t subs[SUBS_MAX];
+    // Stash for subpics derived from the passed subpicture rather than
+    // included with the main pic
+    MMAL_BUFFER_HEADER_T * subpic_bufs[SUBS_MAX];
+
+    picture_pool_t * pic_pool;
+
+    struct vout_isp_conf_s {
+        MMAL_COMPONENT_T *component;
+        MMAL_PORT_T * input;
+        MMAL_PORT_T * output;
+        MMAL_QUEUE_T * out_q;
+        MMAL_POOL_T * in_pool;
+        MMAL_POOL_T * out_pool;
+        bool pending;
+    } isp;
 
-/* Utility functions */
-static inline uint32_t align(uint32_t x, uint32_t y);
-static int configure_display(vout_display_t *vd, const vout_display_cfg_t *cfg,
-                const video_format_t *fmt);
+    MMAL_POOL_T * copy_pool;
+    MMAL_BUFFER_HEADER_T * copy_buf;
 
-/* VLC vout display callbacks */
-static picture_pool_t *vd_pool(vout_display_t *vd, unsigned count);
-static void vd_prepare(vout_display_t *vd, picture_t *picture,
-                subpicture_t *subpicture);
-static void vd_display(vout_display_t *vd, picture_t *picture,
-                subpicture_t *subpicture);
-static int vd_control(vout_display_t *vd, int query, va_list args);
-static void vd_manage(vout_display_t *vd);
-
-/* MMAL callbacks */
-static void control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer);
-static void input_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer);
+    // Subpic blend if we have to do it here
+    vzc_pool_ctl_t * vzc;
+};
 
-/* TV service */
-static int query_resolution(vout_display_t *vd, unsigned *width, unsigned *height);
-static void tvservice_cb(void *callback_data, uint32_t reason, uint32_t param1,
-                uint32_t param2);
-static void adjust_refresh_rate(vout_display_t *vd, const video_format_t *fmt);
-static int set_latency_target(vout_display_t *vd, bool enable);
 
-/* DispManX */
-static void display_subpicture(vout_display_t *vd, subpicture_t *subpicture);
-static void close_dmx(vout_display_t *vd);
-static struct dmx_region_t *dmx_region_new(vout_display_t *vd,
-                DISPMANX_UPDATE_HANDLE_T update, subpicture_region_t *region);
-static void dmx_region_update(struct dmx_region_t *dmx_region,
-                DISPMANX_UPDATE_HANDLE_T update, picture_t *picture);
-static void dmx_region_delete(struct dmx_region_t *dmx_region,
-                DISPMANX_UPDATE_HANDLE_T update);
-static void show_background(vout_display_t *vd, bool enable);
-static void maintain_phase_sync(vout_display_t *vd);
+// ISP setup
 
-static int Open(vlc_object_t *object)
+static inline bool want_isp(const vout_display_t * const vd)
 {
-    vout_display_t *vd = (vout_display_t *)object;
-    vout_display_sys_t *sys;
-    uint32_t buffer_pitch, buffer_height;
-    vout_display_place_t place;
-    MMAL_DISPLAYREGION_T display_region;
-    MMAL_STATUS_T status;
-    int ret = VLC_SUCCESS;
-    unsigned i;
+    return (vd->fmt.i_chroma == VLC_CODEC_MMAL_ZC_SAND10);
+}
 
-    if (vout_display_IsWindowed(vd))
-        return VLC_EGENERIC;
+static inline bool want_copy(const vout_display_t * const vd)
+{
+    return (vd->fmt.i_chroma == VLC_CODEC_I420 || vd->fmt.i_chroma == VLC_CODEC_I420_10L);
+}
 
-    sys = calloc(1, sizeof(struct vout_display_sys_t));
-    if (!sys)
-        return VLC_ENOMEM;
-    vd->sys = sys;
+static inline vlc_fourcc_t req_chroma(const vout_display_t * const vd)
+{
+    return !hw_mmal_chroma_is_mmal(vd->fmt.i_chroma) && !want_copy(vd) ?
+        VLC_CODEC_I420 :
+        vd->fmt.i_chroma;
+}
 
-    sys->layer = var_InheritInteger(vd, MMAL_LAYER_NAME);
-    bcm_host_init();
+static MMAL_FOURCC_T vout_vlc_to_mmal_pic_fourcc(const unsigned int fcc)
+{
+    switch (fcc){
+    case VLC_CODEC_MMAL_OPAQUE:
+        return MMAL_ENCODING_OPAQUE;
+    case VLC_CODEC_MMAL_ZC_SAND8:
+        return MMAL_ENCODING_YUVUV128;
+    case VLC_CODEC_MMAL_ZC_SAND10:
+        return MMAL_ENCODING_YUVUV64_10;
+    case VLC_CODEC_MMAL_ZC_SAND30:
+        return MMAL_ENCODING_YUV10_COL;
+    case VLC_CODEC_MMAL_ZC_I420:
+    case VLC_CODEC_I420:
+        return MMAL_ENCODING_I420;
+    default:
+        break;
+    }
+    return MMAL_ENCODING_I420;
+}
 
-    sys->opaque = vd->fmt.i_chroma == VLC_CODEC_MMAL_OPAQUE;
+static void display_set_format(const vout_display_t * const vd, MMAL_ES_FORMAT_T *const es_fmt, const bool is_intermediate)
+{
+    const unsigned int w = is_intermediate ? vd->fmt.i_visible_width  : vd->fmt.i_width ;
+    const unsigned int h = is_intermediate ? vd->fmt.i_visible_height : vd->fmt.i_height;
+    MMAL_VIDEO_FORMAT_T * const v_fmt = &es_fmt->es->video;
 
-    status = mmal_component_create(MMAL_COMPONENT_DEFAULT_VIDEO_RENDERER, &sys->component);
-    if (status != MMAL_SUCCESS) {
-        msg_Err(vd, "Failed to create MMAL component %s (status=%"PRIx32" %s)",
-                        MMAL_COMPONENT_DEFAULT_VIDEO_RENDERER, status, mmal_status_to_string(status));
-        ret = VLC_EGENERIC;
-        goto out;
+    es_fmt->type = MMAL_ES_TYPE_VIDEO;
+    es_fmt->encoding = is_intermediate ? MMAL_ENCODING_I420 : vout_vlc_to_mmal_pic_fourcc(vd->fmt.i_chroma);
+    es_fmt->encoding_variant = 0;
+
+    v_fmt->width  = (w + 31) & ~31;
+    v_fmt->height = (h + 15) & ~15;
+    v_fmt->crop.x = 0;
+    v_fmt->crop.y = 0;
+    v_fmt->crop.width = w;
+    v_fmt->crop.height = h;
+    if (vd->fmt.i_sar_num == 0 || vd->fmt.i_sar_den == 0) {
+        v_fmt->par.num        = 1;
+        v_fmt->par.den        = 1;
+    } else {
+        v_fmt->par.num        = vd->fmt.i_sar_num;
+        v_fmt->par.den        = vd->fmt.i_sar_den;
     }
+    v_fmt->frame_rate.num = vd->fmt.i_frame_rate;
+    v_fmt->frame_rate.den = vd->fmt.i_frame_rate_base;
+    v_fmt->color_space    = vlc_to_mmal_color_space(vd->fmt.space);
 
-    sys->component->control->userdata = (struct MMAL_PORT_USERDATA_T *)vd;
-    status = mmal_port_enable(sys->component->control, control_port_cb);
-    if (status != MMAL_SUCCESS) {
-        msg_Err(vd, "Failed to enable control port %s (status=%"PRIx32" %s)",
-                        sys->component->control->name, status, mmal_status_to_string(status));
-        ret = VLC_EGENERIC;
-        goto out;
+    msg_Dbg(vd, "WxH: %dx%d, Crop: %dx%d", v_fmt->width, v_fmt->height, v_fmt->crop.width, v_fmt->crop.height);
+}
+
+static void display_src_rect(const vout_display_t * const vd, MMAL_RECT_T *const rect)
+{
+    const bool wants_isp = want_isp(vd);
+    rect->x = wants_isp ? 0 : vd->fmt.i_x_offset;
+    rect->y = wants_isp ? 0 : vd->fmt.i_y_offset;
+    rect->width = vd->fmt.i_visible_width;
+    rect->height = vd->fmt.i_visible_height;
+}
+
+static void isp_input_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buf)
+{
+#if TRACE_ALL
+    vout_display_t * const vd = (vout_display_t *)port->userdata;
+    pic_ctx_mmal_t * ctx = buf->user_data;
+    msg_Dbg(vd, "<<< %s: cmd=%d, ctx=%p, buf=%p, flags=%#x, pts=%lld", __func__, buf->cmd, ctx, buf,
+            buf->flags, (long long)buf->pts);
+#else
+    VLC_UNUSED(port);
+#endif
+
+    mmal_buffer_header_release(buf);
+
+#if TRACE_ALL
+    msg_Dbg(vd, ">>> %s", __func__);
+#endif
+}
+
+static void isp_control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
+{
+    vout_display_t *vd = (vout_display_t *)port->userdata;
+    MMAL_STATUS_T status;
+
+    if (buffer->cmd == MMAL_EVENT_ERROR) {
+        status = *(uint32_t *)buffer->data;
+        msg_Err(vd, "MMAL error %"PRIx32" \"%s\"", status, mmal_status_to_string(status));
     }
 
-    sys->input = sys->component->input[0];
-    sys->input->userdata = (struct MMAL_PORT_USERDATA_T *)vd;
+    mmal_buffer_header_release(buffer);
+}
 
-    if (sys->opaque) {
-        sys->input->format->encoding = MMAL_ENCODING_OPAQUE;
-        sys->i_planes = 1;
-        sys->buffer_size = sys->input->buffer_size_recommended;
-    } else {
-        sys->input->format->encoding = MMAL_ENCODING_I420;
-        vd->fmt.i_chroma = VLC_CODEC_I420;
-        buffer_pitch = align(vd->fmt.i_width, 32);
-        buffer_height = align(vd->fmt.i_height, 16);
-        sys->i_planes = 3;
-        sys->buffer_size = 3 * buffer_pitch * buffer_height / 2;
-    }
-
-    sys->input->format->es->video.width = vd->fmt.i_width;
-    sys->input->format->es->video.height = vd->fmt.i_height;
-    sys->input->format->es->video.crop.x = 0;
-    sys->input->format->es->video.crop.y = 0;
-    sys->input->format->es->video.crop.width = vd->fmt.i_width;
-    sys->input->format->es->video.crop.height = vd->fmt.i_height;
-    sys->input->format->es->video.par.num = vd->source.i_sar_num;
-    sys->input->format->es->video.par.den = vd->source.i_sar_den;
+static void isp_output_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buf)
+{
+    if (buf->cmd == 0 && buf->length != 0)
+    {
+        // The filter structure etc. should always exist if we have contents
+        // but might not on later flushes as we shut down
+        vout_display_t * const vd = (vout_display_t *)port->userdata;
+        struct vout_isp_conf_s *const isp = &vd->sys->isp;
 
-    status = mmal_port_format_commit(sys->input);
-    if (status != MMAL_SUCCESS) {
-        msg_Err(vd, "Failed to commit format for input port %s (status=%"PRIx32" %s)",
-                        sys->input->name, status, mmal_status_to_string(status));
-        ret = VLC_EGENERIC;
-        goto out;
+#if TRACE_ALL
+        msg_Dbg(vd, "<<< %s: cmd=%d; flags=%#x, pts=%lld", __func__, buf->cmd, buf->flags, (long long) buf->pts);
+#endif
+        mmal_queue_put(isp->out_q, buf);
+#if TRACE_ALL
+        msg_Dbg(vd, ">>> %s: out Q len=%d", __func__, mmal_queue_length(isp->out_q));
+#endif
     }
-    sys->input->buffer_size = sys->input->buffer_size_recommended;
+    else
+    {
+        mmal_buffer_header_reset(buf);
+        mmal_buffer_header_release(buf);
+    }
+}
 
-    vout_display_PlacePicture(&place, &vd->source, vd->cfg, false);
-    display_region.hdr.id = MMAL_PARAMETER_DISPLAYREGION;
-    display_region.hdr.size = sizeof(MMAL_DISPLAYREGION_T);
-    display_region.fullscreen = MMAL_FALSE;
-    display_region.src_rect.x = vd->fmt.i_x_offset;
-    display_region.src_rect.y = vd->fmt.i_y_offset;
-    display_region.src_rect.width = vd->fmt.i_visible_width;
-    display_region.src_rect.height = vd->fmt.i_visible_height;
-    display_region.dest_rect.x = place.x;
-    display_region.dest_rect.y = place.y;
-    display_region.dest_rect.width = place.width;
-    display_region.dest_rect.height = place.height;
-    display_region.layer = sys->layer;
-    display_region.set = MMAL_DISPLAY_SET_FULLSCREEN | MMAL_DISPLAY_SET_SRC_RECT |
-            MMAL_DISPLAY_SET_DEST_RECT | MMAL_DISPLAY_SET_LAYER;
-    status = mmal_port_parameter_set(sys->input, &display_region.hdr);
-    if (status != MMAL_SUCCESS) {
-        msg_Err(vd, "Failed to set display region (status=%"PRIx32" %s)",
-                        status, mmal_status_to_string(status));
-        ret = VLC_EGENERIC;
-        goto out;
+static void isp_empty_out_q(struct vout_isp_conf_s * const isp)
+{
+    MMAL_BUFFER_HEADER_T * buf;
+    // We can be called as part of error recovery so allow for missing Q
+    if (isp->out_q == NULL)
+        return;
+
+    while ((buf = mmal_queue_get(isp->out_q)) != NULL)
+        mmal_buffer_header_release(buf);
+}
+
+static void isp_flush(struct vout_isp_conf_s * const isp)
+{
+    if (!isp->input->is_enabled)
+        mmal_port_disable(isp->input);
+
+    if (isp->output->is_enabled)
+        mmal_port_disable(isp->output);
+
+    isp_empty_out_q(isp);
+    isp->pending = false;
+}
+
+static MMAL_STATUS_T isp_prepare(vout_display_t * const vd, struct vout_isp_conf_s * const isp)
+{
+    MMAL_STATUS_T err;
+    MMAL_BUFFER_HEADER_T * buf;
+
+    if (!isp->output->is_enabled) {
+        if ((err = mmal_port_enable(isp->output, isp_output_cb)) != MMAL_SUCCESS)
+        {
+            msg_Err(vd, "ISP output port enable failed");
+            return err;
+        }
     }
 
-    for (i = 0; i < sys->i_planes; ++i) {
-        sys->planes[i].i_lines = buffer_height;
-        sys->planes[i].i_pitch = buffer_pitch;
-        sys->planes[i].i_visible_lines = vd->fmt.i_visible_height;
-        sys->planes[i].i_visible_pitch = vd->fmt.i_visible_width;
+    while ((buf = mmal_queue_get(isp->out_pool->queue)) != NULL) {
+        if ((err = mmal_port_send_buffer(isp->output, buf)) != MMAL_SUCCESS)
+        {
+            msg_Err(vd, "ISP output port stuff failed");
+            return err;
+        }
+    }
 
-        if (i > 0) {
-            sys->planes[i].i_lines /= 2;
-            sys->planes[i].i_pitch /= 2;
-            sys->planes[i].i_visible_lines /= 2;
-            sys->planes[i].i_visible_pitch /= 2;
+    if (!isp->input->is_enabled) {
+        if ((err = mmal_port_enable(isp->input, isp_input_cb)) != MMAL_SUCCESS)
+        {
+            msg_Err(vd, "ISP input port enable failed");
+            return err;
         }
     }
+    return MMAL_SUCCESS;
+}
 
-    vlc_mutex_init(&sys->buffer_mutex);
-    vlc_cond_init(&sys->buffer_cond);
-    vlc_mutex_init(&sys->manage_mutex);
+static void isp_close(vout_display_t * const vd, vout_display_sys_t * const vd_sys)
+{
+    struct vout_isp_conf_s * const isp = &vd_sys->isp;
+    VLC_UNUSED(vd);
 
-    vd->pool = vd_pool;
-    vd->prepare = vd_prepare;
-    vd->display = vd_display;
-    vd->control = vd_control;
-    vd->manage = vd_manage;
+    if (isp->component == NULL)
+        return;
 
-    vc_tv_register_callback(tvservice_cb, vd);
+    isp_flush(isp);
 
-    if (query_resolution(vd, &sys->display_width, &sys->display_height) >= 0) {
-        vout_display_SendEventDisplaySize(vd, sys->display_width, sys->display_height);
-    } else {
-        sys->display_width = vd->cfg->display.width;
-        sys->display_height = vd->cfg->display.height;
+    if (isp->component->control->is_enabled)
+        mmal_port_disable(isp->component->control);
+
+    if (isp->out_q != NULL) {
+        // 1st junk anything lying around
+        isp_empty_out_q(isp);
+
+        mmal_queue_destroy(isp->out_q);
+        isp->out_q = NULL;
     }
 
-    sys->dmx_handle = vc_dispmanx_display_open(0);
-    vd->info.subpicture_chromas = subpicture_chromas;
+    if (isp->out_pool != NULL) {
+        mmal_port_pool_destroy(isp->output, isp->out_pool);
+        isp->out_pool = NULL;
+    }
 
-    vout_display_DeleteWindow(vd, NULL);
+    isp->input = NULL;
+    isp->output = NULL;
 
-out:
-    if (ret != VLC_SUCCESS)
-        Close(object);
+    mmal_component_release(isp->component);
+    isp->component = NULL;
 
-    return ret;
+    return;
 }
 
-static void Close(vlc_object_t *object)
+// Restuff into output rather than return to pool is we can
+static MMAL_BOOL_T isp_out_pool_cb(MMAL_POOL_T *pool, MMAL_BUFFER_HEADER_T *buffer, void *userdata)
 {
-    vout_display_t *vd = (vout_display_t *)object;
-    vout_display_sys_t *sys = vd->sys;
-    char response[20]; /* answer is hvs_update_fields=%1d */
-    unsigned i;
+    struct vout_isp_conf_s * const isp = userdata;
+    VLC_UNUSED(pool);
+    if (isp->output->is_enabled) {
+        mmal_buffer_header_reset(buffer);
+        if (mmal_port_send_buffer(isp->output, buffer) == MMAL_SUCCESS)
+            return MMAL_FALSE;
+    }
+    return MMAL_TRUE;
+}
 
-    vc_tv_unregister_callback_full(tvservice_cb, vd);
+static MMAL_STATUS_T isp_setup(vout_display_t * const vd, vout_display_sys_t * const vd_sys)
+{
+    struct vout_isp_conf_s * const isp = &vd_sys->isp;
+    MMAL_STATUS_T err;
 
-    if (sys->dmx_handle)
-        close_dmx(vd);
+    if ((err = mmal_component_create(MMAL_COMPONENT_ISP_RESIZER, &isp->component)) != MMAL_SUCCESS) {
+        msg_Err(vd, "Cannot create ISP component");
+        return err;
+    }
+    isp->input = isp->component->input[0];
+    isp->output = isp->component->output[0];
 
-    if (sys->component && sys->component->control->is_enabled)
-        mmal_port_disable(sys->component->control);
+    isp->component->control->userdata = (void *)vd;
+    if ((err = mmal_port_enable(isp->component->control, isp_control_port_cb)) != MMAL_SUCCESS) {
+        msg_Err(vd, "Failed to enable ISP control port");
+        goto fail;
+    }
 
-    if (sys->input && sys->input->is_enabled)
-        mmal_port_disable(sys->input);
+    isp->input->userdata = (void *)vd;
+    display_set_format(vd, isp->input->format, false);
 
-    if (sys->component && sys->component->is_enabled)
-        mmal_component_disable(sys->component);
+    if ((err = port_parameter_set_bool(isp->input, MMAL_PARAMETER_ZERO_COPY, true)) != MMAL_SUCCESS)
+        goto fail;
 
-    if (sys->pool)
-        mmal_port_pool_destroy(sys->input, sys->pool);
+    if ((err = mmal_port_format_commit(isp->input)) != MMAL_SUCCESS) {
+        msg_Err(vd, "Failed to set ISP input format");
+        goto fail;
+    }
 
-    if (sys->component)
-        mmal_component_release(sys->component);
+    isp->input->buffer_size = isp->input->buffer_size_recommended;
+    isp->input->buffer_num = 30;
 
-    if (sys->picture_pool)
-        picture_pool_Release(sys->picture_pool);
-    else
-        for (i = 0; i < sys->num_buffers; ++i)
-            if (sys->pictures[i]) {
-                mmal_buffer_header_release(sys->pictures[i]->p_sys->buffer);
-                picture_Release(sys->pictures[i]);
-            }
+    if ((isp->in_pool = mmal_pool_create(isp->input->buffer_num, 0)) == NULL)
+    {
+        msg_Err(vd, "Failed to create input pool");
+        goto fail;
+    }
 
-    vlc_mutex_destroy(&sys->buffer_mutex);
-    vlc_cond_destroy(&sys->buffer_cond);
-    vlc_mutex_destroy(&sys->manage_mutex);
+    if ((isp->out_q = mmal_queue_create()) == NULL)
+    {
+        err = MMAL_ENOMEM;
+        goto fail;
+    }
 
-    if (sys->native_interlaced) {
-        if (vc_gencmd(response, sizeof(response), "hvs_update_fields 0") < 0 ||
-                response[18] != '0')
-            msg_Warn(vd, "Could not reset hvs field mode");
+    display_set_format(vd, isp->output->format, true);
+
+    if ((err = port_parameter_set_bool(isp->output, MMAL_PARAMETER_ZERO_COPY, true)) != MMAL_SUCCESS)
+        goto fail;
+
+    if ((err = mmal_port_format_commit(isp->output)) != MMAL_SUCCESS) {
+        msg_Err(vd, "Failed to set ISP input format");
+        goto fail;
     }
 
-    free(sys->pictures);
-    free(sys);
+    isp->output->buffer_size = isp->output->buffer_size_recommended;
+    isp->output->buffer_num = 2;
+    isp->output->userdata = (void *)vd;
+
+    if ((isp->out_pool = mmal_port_pool_create(isp->output, isp->output->buffer_num, isp->output->buffer_size)) == NULL)
+    {
+        msg_Err(vd, "Failed to make ISP port pool");
+        goto fail;
+    }
 
-    bcm_host_deinit();
+    mmal_pool_callback_set(isp->out_pool, isp_out_pool_cb, isp);
+
+    if ((err = isp_prepare(vd, isp)) != MMAL_SUCCESS)
+        goto fail;
+
+    return MMAL_SUCCESS;
+
+fail:
+    isp_close(vd, vd_sys);
+    return err;
 }
 
-static inline uint32_t align(uint32_t x, uint32_t y) {
-    uint32_t mod = x % y;
-    if (mod == 0)
-        return x;
+static MMAL_STATUS_T isp_check(vout_display_t * const vd, vout_display_sys_t * const vd_sys)
+{
+    struct vout_isp_conf_s *const isp = &vd_sys->isp;
+    const bool has_isp = (isp->component != NULL);
+    const bool wants_isp = want_isp(vd);
+
+    if (has_isp == wants_isp)
+    {
+        // All OK - do nothing
+    }
+    else if (has_isp)
+    {
+        // ISP active but we don't want it
+        isp_flush(isp);
+
+        // Check we have everything back and then kill it
+        if (mmal_queue_length(isp->out_pool->queue) == isp->output->buffer_num)
+            isp_close(vd, vd_sys);
+    }
     else
-        return x + y - mod;
+    {
+        // ISP closed but we want it
+        return isp_setup(vd, vd_sys);
+    }
+
+    return MMAL_SUCCESS;
+}
+
+/* TV service */
+static void tvservice_cb(void *callback_data, uint32_t reason, uint32_t param1,
+                uint32_t param2);
+static void adjust_refresh_rate(vout_display_t *vd, const video_format_t *fmt);
+static int set_latency_target(vout_display_t *vd, bool enable);
+
+// Mmal
+static void maintain_phase_sync(vout_display_t *vd);
+
+
+
+static void vd_input_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buf)
+{
+#if TRACE_ALL
+    vout_display_t * const vd = (vout_display_t *)port->userdata;
+    pic_ctx_mmal_t * ctx = buf->user_data;
+    msg_Dbg(vd, "<<< %s: cmd=%d, ctx=%p, buf=%p, flags=%#x, pts=%lld", __func__, buf->cmd, ctx, buf,
+            buf->flags, (long long)buf->pts);
+#else
+    VLC_UNUSED(port);
+#endif
+
+    mmal_buffer_header_release(buf);
+
+#if TRACE_ALL
+    msg_Dbg(vd, ">>> %s", __func__);
+#endif
+}
+
+static int query_resolution(vout_display_t *vd, const int display_id, unsigned *width, unsigned *height)
+{
+    TV_DISPLAY_STATE_T display_state = {0};
+    int ret = 0;
+
+    if (vc_tv_get_display_state_id(display_id, &display_state) == 0) {
+        msg_Dbg(vd, "State=%#x", display_state.state);
+        if (display_state.state & 0xFF) {
+            msg_Dbg(vd, "HDMI: %dx%d", display_state.display.hdmi.width, display_state.display.hdmi.height);
+            *width = display_state.display.hdmi.width;
+            *height = display_state.display.hdmi.height;
+        } else if (display_state.state & 0xFF00) {
+            msg_Dbg(vd, "SDTV: %dx%d", display_state.display.sdtv.width, display_state.display.sdtv.height);
+            *width = display_state.display.sdtv.width;
+            *height = display_state.display.sdtv.height;
+        } else {
+            msg_Warn(vd, "Invalid display state %"PRIx32, display_state.state);
+            ret = -1;
+        }
+    } else {
+        msg_Warn(vd, "Failed to query display resolution");
+        ret = -1;
+    }
+
+    return ret;
+}
+
+static inline MMAL_RECT_T
+place_to_mmal_rect(const vout_display_place_t place)
+{
+    return (MMAL_RECT_T){
+        .x      = place.x,
+        .y      = place.y,
+        .width  = place.width,
+        .height = place.height
+    };
 }
 
+static void
+place_dest(vout_display_t *vd, vout_display_sys_t * const sys,
+           const vout_display_cfg_t * const cfg, const video_format_t * fmt)
+{
+    video_format_t tfmt;
+
+    // Fix SAR if unknown
+    if (fmt->i_sar_den == 0 || fmt->i_sar_num == 0) {
+        tfmt = *fmt;
+        tfmt.i_sar_den = 1;
+        tfmt.i_sar_num = 1;
+        fmt = &tfmt;
+    }
+
+    // Ignore what VLC thinks might be going on with display size
+    vout_display_cfg_t tcfg = *cfg;
+    vout_display_place_t place;
+    tcfg.display.width = sys->display_width;
+    tcfg.display.height = sys->display_height;
+    tcfg.is_display_filled = true;
+    vout_display_PlacePicture(&place, fmt, &tcfg, false);
+
+    sys->dest_rect = place_to_mmal_rect(place);
+#if TRACE_ALL
+    msg_Dbg(vd, "%s: %dx%d -> %dx%d @ %d,%d", __func__,
+            tcfg.display.width, tcfg.display.height,
+            place.width, place.height, place.x, place.y);
+#endif
+}
+
+
+
 static int configure_display(vout_display_t *vd, const vout_display_cfg_t *cfg,
                 const video_format_t *fmt)
 {
-    vout_display_sys_t *sys = vd->sys;
-    vout_display_place_t place;
+    vout_display_sys_t * const sys = vd->sys;
     MMAL_DISPLAYREGION_T display_region;
     MMAL_STATUS_T status;
 
     if (!cfg && !fmt)
+    {
+        msg_Err(vd, "%s: Missing cfg & fmt", __func__);
         return -EINVAL;
+    }
+
+    isp_check(vd, sys);
 
     if (fmt) {
         sys->input->format->es->video.par.num = fmt->i_sar_num;
@@ -412,22 +610,17 @@
     if (!cfg)
         cfg = vd->cfg;
 
-    vout_display_PlacePicture(&place, fmt, cfg, false);
+    place_dest(vd, sys, cfg, fmt);
 
     display_region.hdr.id = MMAL_PARAMETER_DISPLAYREGION;
     display_region.hdr.size = sizeof(MMAL_DISPLAYREGION_T);
     display_region.fullscreen = MMAL_FALSE;
-    display_region.src_rect.x = fmt->i_x_offset;
-    display_region.src_rect.y = fmt->i_y_offset;
-    display_region.src_rect.width = fmt->i_visible_width;
-    display_region.src_rect.height = fmt->i_visible_height;
-    display_region.dest_rect.x = place.x;
-    display_region.dest_rect.y = place.y;
-    display_region.dest_rect.width = place.width;
-    display_region.dest_rect.height = place.height;
+    display_src_rect(vd, &display_region.src_rect);
+    display_region.dest_rect = sys->dest_rect;
     display_region.layer = sys->layer;
+    display_region.alpha = 0xff | (1 << 29);
     display_region.set = MMAL_DISPLAY_SET_FULLSCREEN | MMAL_DISPLAY_SET_SRC_RECT |
-            MMAL_DISPLAY_SET_DEST_RECT | MMAL_DISPLAY_SET_LAYER;
+            MMAL_DISPLAY_SET_DEST_RECT | MMAL_DISPLAY_SET_LAYER | MMAL_DISPLAY_SET_ALPHA;
     status = mmal_port_parameter_set(sys->input, &display_region.hdr);
     if (status != MMAL_SUCCESS) {
         msg_Err(vd, "Failed to set display region (status=%"PRIx32" %s)",
@@ -435,7 +628,6 @@
         return -EINVAL;
     }
 
-    show_background(vd, var_InheritBool(vd, MMAL_BLANK_BACKGROUND_NAME));
     sys->adjust_refresh_rate = var_InheritBool(vd, MMAL_ADJUST_REFRESHRATE_NAME);
     sys->native_interlaced = var_InheritBool(vd, MMAL_NATIVE_INTERLACED);
     if (sys->adjust_refresh_rate) {
@@ -446,192 +638,161 @@
     return 0;
 }
 
-static picture_pool_t *vd_pool(vout_display_t *vd, unsigned count)
+static void kill_pool(vout_display_sys_t * const sys)
 {
-    vout_display_sys_t *sys = vd->sys;
-    picture_resource_t picture_res;
-    picture_pool_configuration_t picture_pool_cfg;
-    video_format_t fmt = vd->fmt;
-    MMAL_STATUS_T status;
-    unsigned i;
-
-    if (sys->picture_pool) {
-        if (sys->num_buffers < count)
-            msg_Warn(vd, "Picture pool with %u pictures requested, but we already have one with %u pictures",
-                            count, sys->num_buffers);
-
-        goto out;
+    if (sys->pic_pool != NULL) {
+        picture_pool_Release(sys->pic_pool);
+        sys->pic_pool = NULL;
     }
+}
 
-    if (sys->opaque) {
-        if (count <= NUM_ACTUAL_OPAQUE_BUFFERS)
-            count = NUM_ACTUAL_OPAQUE_BUFFERS;
+// Actual picture pool for MMAL opaques is just a set of trivial containers
+static picture_pool_t *vd_pool(vout_display_t *vd, unsigned count)
+{
+    vout_display_sys_t * const sys = vd->sys;
 
-        MMAL_PARAMETER_BOOLEAN_T zero_copy = {
-            { MMAL_PARAMETER_ZERO_COPY, sizeof(MMAL_PARAMETER_BOOLEAN_T) },
-            1
-        };
+    msg_Dbg(vd, "%s: fmt:%dx%d,sar:%d/%d; source:%dx%d", __func__,
+            vd->fmt.i_width, vd->fmt.i_height, vd->fmt.i_sar_num, vd->fmt.i_sar_den, vd->source.i_width, vd->source.i_height);
 
-        status = mmal_port_parameter_set(sys->input, &zero_copy.hdr);
-        if (status != MMAL_SUCCESS) {
-           msg_Err(vd, "Failed to set zero copy on port %s (status=%"PRIx32" %s)",
-                    sys->input->name, status, mmal_status_to_string(status));
-           goto out;
-        }
+    if (sys->pic_pool == NULL) {
+        sys->pic_pool = picture_pool_NewFromFormat(&vd->fmt, count);
     }
+    return sys->pic_pool;
+}
 
-    if (count < sys->input->buffer_num_recommended)
-        count = sys->input->buffer_num_recommended;
+static void vd_display(vout_display_t *vd, picture_t *p_pic,
+                subpicture_t *subpicture)
+{
+    vout_display_sys_t * const sys = vd->sys;
+    MMAL_STATUS_T err;
 
-#ifndef NDEBUG
-    msg_Dbg(vd, "Creating picture pool with %u pictures", count);
+#if TRACE_ALL
+    {
+        char dbuf0[5];
+        msg_Dbg(vd, "<<< %s: %s,%dx%d [(%d,%d) %d/%d] sar:%d/%d", __func__,
+                str_fourcc(dbuf0, p_pic->format.i_chroma), p_pic->format.i_width, p_pic->format.i_height,
+                p_pic->format.i_x_offset, p_pic->format.i_y_offset,
+                p_pic->format.i_visible_width, p_pic->format.i_visible_height,
+                p_pic->format.i_sar_num, p_pic->format.i_sar_den);
+    }
 #endif
 
-    sys->input->buffer_num = count;
-    status = mmal_port_enable(sys->input, input_port_cb);
-    if (status != MMAL_SUCCESS) {
-        msg_Err(vd, "Failed to enable input port %s (status=%"PRIx32" %s)",
-                        sys->input->name, status, mmal_status_to_string(status));
-        goto out;
+    // If we had subpics then we have attached them to the main pic in prepare
+    // so all we have to do here is delete the refs
+    if (subpicture != NULL) {
+        subpicture_Delete(subpicture);
     }
 
-    status = mmal_component_enable(sys->component);
-    if (status != MMAL_SUCCESS) {
-        msg_Err(vd, "Failed to enable component %s (status=%"PRIx32" %s)",
-                        sys->component->name, status, mmal_status_to_string(status));
-        goto out;
+    if (!sys->input->is_enabled &&
+        (err = mmal_port_enable(sys->input, vd_input_port_cb)) != MMAL_SUCCESS)
+    {
+        msg_Err(vd, "Input port enable failed");
+        goto fail;
+    }
+    // Stuff into input
+    // We assume the BH is already set up with values reflecting pic date etc.
+    if (sys->copy_buf != NULL) {
+        MMAL_BUFFER_HEADER_T *const buf = sys->copy_buf;
+        sys->copy_buf = NULL;
+#if TRACE_ALL
+        msg_Dbg(vd, "--- %s: Copy stuff", __func__);
+#endif
+        if (mmal_port_send_buffer(sys->input, buf) != MMAL_SUCCESS)
+        {
+            mmal_buffer_header_release(buf);
+            msg_Err(vd, "Send copy buffer to render input failed");
+            goto fail;
+        }
     }
-
-    sys->num_buffers = count;
-    sys->pool = mmal_port_pool_create(sys->input, sys->num_buffers,
-            sys->input->buffer_size);
-    if (!sys->pool) {
-        msg_Err(vd, "Failed to create MMAL pool for %u buffers of size %"PRIu32,
-                        count, sys->input->buffer_size);
-        goto out;
+    else if (sys->isp.pending) {
+        MMAL_BUFFER_HEADER_T *const buf = mmal_queue_wait(sys->isp.out_q);
+        sys->isp.pending = false;
+#if TRACE_ALL
+        msg_Dbg(vd, "--- %s: ISP stuff", __func__);
+#endif
+        if (mmal_port_send_buffer(sys->input, buf) != MMAL_SUCCESS)
+        {
+            mmal_buffer_header_release(buf);
+            msg_Err(vd, "Send ISP buffer to render input failed");
+            goto fail;
+        }
     }
-
-    memset(&picture_res, 0, sizeof(picture_resource_t));
-    sys->pictures = calloc(sys->num_buffers, sizeof(picture_t *));
-    for (i = 0; i < sys->num_buffers; ++i) {
-        picture_res.p_sys = calloc(1, sizeof(picture_sys_t));
-        picture_res.p_sys->owner = (vlc_object_t *)vd;
-        picture_res.p_sys->buffer = mmal_queue_get(sys->pool->queue);
-
-        sys->pictures[i] = picture_NewFromResource(&fmt, &picture_res);
-        if (!sys->pictures[i]) {
-            msg_Err(vd, "Failed to create picture");
-            free(picture_res.p_sys);
-            goto out;
+    else
+    {
+        MMAL_BUFFER_HEADER_T *const pic_buf = hw_mmal_pic_buf_replicated(p_pic, sys->pool);
+        if (pic_buf == NULL)
+        {
+            msg_Err(vd, "Replicated buffer get fail");
+            goto fail;
         }
 
-        sys->pictures[i]->i_planes = sys->i_planes;
-        memcpy(sys->pictures[i]->p, sys->planes, sys->i_planes * sizeof(plane_t));
-    }
 
-    memset(&picture_pool_cfg, 0, sizeof(picture_pool_configuration_t));
-    picture_pool_cfg.picture_count = sys->num_buffers;
-    picture_pool_cfg.picture = sys->pictures;
-    picture_pool_cfg.lock = mmal_picture_lock;
+        // If dimensions have chnaged then fix that
+        if (hw_mmal_vlc_pic_to_mmal_fmt_update(sys->input->format, p_pic))
+        {
+            msg_Dbg(vd, "Reset port format");
+
+            // HVS can deal with on-line dimension changes
+            if (mmal_port_format_commit(sys->input) != MMAL_SUCCESS)
+                msg_Warn(vd, "Input format commit failed");
+        }
 
-    sys->picture_pool = picture_pool_NewExtended(&picture_pool_cfg);
-    if (!sys->picture_pool) {
-        msg_Err(vd, "Failed to create picture pool");
-        goto out;
+        if ((err = mmal_port_send_buffer(sys->input, pic_buf)) != MMAL_SUCCESS)
+        {
+            mmal_buffer_header_release(pic_buf);
+            msg_Err(vd, "Send buffer to input failed");
+            goto fail;
+        }
     }
 
-out:
-    return sys->picture_pool;
-}
-
-static void vd_prepare(vout_display_t *vd, picture_t *picture,
-                subpicture_t *subpicture)
-{
-    vout_display_sys_t *sys = vd->sys;
-    picture_sys_t *pic_sys = picture->p_sys;
-
-    if (!sys->adjust_refresh_rate || pic_sys->displayed)
-        return;
-
-    /* Apply the required phase_offset to the picture, so that vd_display()
-     * will be called at the corrected time from the core */
-    picture->date += sys->phase_offset;
-}
-
-static void vd_display(vout_display_t *vd, picture_t *picture,
-                subpicture_t *subpicture)
-{
-    vout_display_sys_t *sys = vd->sys;
-    picture_sys_t *pic_sys = picture->p_sys;
-    MMAL_BUFFER_HEADER_T *buffer = pic_sys->buffer;
-    MMAL_STATUS_T status;
-
-    if (picture->format.i_frame_rate != sys->i_frame_rate ||
-        picture->format.i_frame_rate_base != sys->i_frame_rate_base ||
-        picture->b_progressive != sys->b_progressive ||
-        picture->b_top_field_first != sys->b_top_field_first) {
-        sys->b_top_field_first = picture->b_top_field_first;
-        sys->b_progressive = picture->b_progressive;
-        sys->i_frame_rate = picture->format.i_frame_rate;
-        sys->i_frame_rate_base = picture->format.i_frame_rate_base;
-        configure_display(vd, NULL, &picture->format);
-    }
-
-    if (!pic_sys->displayed || !sys->opaque) {
-        buffer->cmd = 0;
-        buffer->length = sys->input->buffer_size;
-        buffer->user_data = picture;
-
-        status = mmal_port_send_buffer(sys->input, buffer);
-        if (status == MMAL_SUCCESS)
-            atomic_fetch_add(&sys->buffers_in_transit, 1);
-
-        if (status != MMAL_SUCCESS) {
-            msg_Err(vd, "Failed to send buffer to input port. Frame dropped");
-            picture_Release(picture);
+    {
+        unsigned int sub_no = 0;
+        MMAL_BUFFER_HEADER_T **psub_bufs2 = sys->subpic_bufs;
+        const bool is_mmal_pic = hw_mmal_pic_is_mmal(p_pic);
+
+        for (sub_no = 0; sub_no != SUBS_MAX; ++sub_no) {
+            int rv;
+            MMAL_BUFFER_HEADER_T * const sub_buf = !is_mmal_pic ? NULL :
+                hw_mmal_pic_sub_buf_get(p_pic, sub_no);
+
+            if ((rv = hw_mmal_subpic_update(VLC_OBJECT(vd),
+                                            sub_buf != NULL ? sub_buf : *psub_bufs2++,
+                                            &sys->subs[sub_no].sub,
+                                            &p_pic->format,
+                                            &sys->dest_rect,
+                                            p_pic->date)) == 0)
+                break;
+            else if (rv < 0)
+                goto fail;
         }
-
-        pic_sys->displayed = true;
-    } else {
-        picture_Release(picture);
     }
 
-    display_subpicture(vd, subpicture);
+fail:
+    for (unsigned int i = 0; i != SUBS_MAX && sys->subpic_bufs[i] != NULL; ++i) {
+        mmal_buffer_header_release(sys->subpic_bufs[i]);
+        sys->subpic_bufs[i] = NULL;
+    }
 
-    if (subpicture)
-        subpicture_Delete(subpicture);
+    picture_Release(p_pic);
 
     if (sys->next_phase_check == 0 && sys->adjust_refresh_rate)
         maintain_phase_sync(vd);
     sys->next_phase_check = (sys->next_phase_check + 1) % PHASE_CHECK_INTERVAL;
-
-    if (sys->opaque) {
-        vlc_mutex_lock(&sys->buffer_mutex);
-        while (atomic_load(&sys->buffers_in_transit) >= MAX_BUFFERS_IN_TRANSIT)
-            vlc_cond_wait(&sys->buffer_cond, &sys->buffer_mutex);
-        vlc_mutex_unlock(&sys->buffer_mutex);
-    }
 }
 
 static int vd_control(vout_display_t *vd, int query, va_list args)
 {
-    vout_display_sys_t *sys = vd->sys;
-    vout_display_cfg_t cfg;
-    const vout_display_cfg_t *tmp_cfg;
+    vout_display_sys_t * const sys = vd->sys;
     int ret = VLC_EGENERIC;
+    VLC_UNUSED(args);
 
     switch (query) {
         case VOUT_DISPLAY_CHANGE_DISPLAY_SIZE:
-            tmp_cfg = va_arg(args, const vout_display_cfg_t *);
-            if (tmp_cfg->display.width == sys->display_width &&
-                            tmp_cfg->display.height == sys->display_height) {
-                cfg = *vd->cfg;
-                cfg.display.width = sys->display_width;
-                cfg.display.height = sys->display_height;
-                if (configure_display(vd, &cfg, NULL) >= 0)
-                    ret = VLC_SUCCESS;
-            }
+        {
+            // Ignore this - we just use full screen anyway
+            ret = VLC_SUCCESS;
             break;
+        }
 
         case VOUT_DISPLAY_CHANGE_SOURCE_ASPECT:
         case VOUT_DISPLAY_CHANGE_SOURCE_CROP:
@@ -640,11 +801,39 @@
             break;
 
         case VOUT_DISPLAY_RESET_PICTURES:
-            vlc_assert_unreachable();
+            msg_Warn(vd, "Reset Pictures");
+            kill_pool(sys);
+            vd->fmt = vd->source; // Take (nearly) whatever source wants to give us
+            vd->fmt.i_chroma = req_chroma(vd);  // Adjust chroma to something we can actaully deal with
+            ret = VLC_SUCCESS;
+            break;
+
         case VOUT_DISPLAY_CHANGE_ZOOM:
             msg_Warn(vd, "Unsupported control query %d", query);
+            ret = VLC_SUCCESS;
             break;
 
+        case VOUT_DISPLAY_CHANGE_MMAL_HIDE:
+        {
+            MMAL_STATUS_T err;
+            unsigned int i;
+
+            msg_Dbg(vd, "Hide display");
+
+            for (i = 0; i != SUBS_MAX; ++i)
+                hw_mmal_subpic_flush(VLC_OBJECT(vd), &sys->subs[i].sub);
+
+            if (sys->input->is_enabled &&
+                (err = mmal_port_disable(sys->input)) != MMAL_SUCCESS)
+            {
+                msg_Err(vd, "Unable to disable port: err=%d", err);
+                break;
+            }
+            sys->force_config = true;
+            ret = VLC_SUCCESS;
+            break;
+        }
+
         default:
             msg_Warn(vd, "Unknown control query %d", query);
             break;
@@ -661,13 +850,11 @@
     vlc_mutex_lock(&sys->manage_mutex);
 
     if (sys->need_configure_display) {
-        close_dmx(vd);
-        sys->dmx_handle = vc_dispmanx_display_open(0);
-
-        if (query_resolution(vd, &width, &height) >= 0) {
+        if (query_resolution(vd, sys->display_id, &width, &height) >= 0) {
             sys->display_width = width;
             sys->display_height = height;
-            vout_display_SendEventDisplaySize(vd, width, height);
+//            msg_Dbg(vd, "%s: %dx%d", __func__, width, height);
+//            vout_window_ReportSize(vd->cfg->window, width, height);
         }
 
         sys->need_configure_display = false;
@@ -676,56 +863,171 @@
     vlc_mutex_unlock(&sys->manage_mutex);
 }
 
-static void control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
+
+static int attach_subpics(vout_display_t * const vd, vout_display_sys_t * const sys,
+                          subpicture_t * const subpicture)
 {
-    vout_display_t *vd = (vout_display_t *)port->userdata;
-    MMAL_STATUS_T status;
+    unsigned int n = 0;
 
-    if (buffer->cmd == MMAL_EVENT_ERROR) {
-        status = *(uint32_t *)buffer->data;
-        msg_Err(vd, "MMAL error %"PRIx32" \"%s\"", status, mmal_status_to_string(status));
+    if (sys->vzc == NULL) {
+        if ((sys->vzc = hw_mmal_vzc_pool_new()) == NULL)
+        {
+            msg_Err(vd, "Failed to allocate VZC");
+            return VLC_ENOMEM;
+        }
     }
 
-    mmal_buffer_header_release(buffer);
+    // Attempt to import the subpics
+    for (subpicture_t * spic = subpicture; spic != NULL; spic = spic->p_next)
+    {
+        for (subpicture_region_t *sreg = spic->p_region; sreg != NULL; sreg = sreg->p_next) {
+            picture_t *const src = sreg->p_picture;
+
+#if 0
+            char dbuf0[5];
+            msg_Dbg(vd, "  [%p:%p] Pos=%d,%d src=%dx%d/%dx%d,  vd->fmt=%dx%d/%dx%d, vd->source=%dx%d/%dx%d, cfg=%dx%d, Alpha=%d, Fmt=%s", src, src->p[0].p_pixels,
+                    sreg->i_x, sreg->i_y,
+                    src->format.i_visible_width, src->format.i_visible_height,
+                    src->format.i_width, src->format.i_height,
+                    vd->fmt.i_visible_width, vd->fmt.i_visible_height,
+                    vd->fmt.i_width, vd->fmt.i_height,
+                    vd->source.i_visible_width, vd->source.i_visible_height,
+                    vd->source.i_width, vd->source.i_height,
+                    vd->cfg->display.width, vd->cfg->display.height,
+                    sreg->i_alpha,
+                    str_fourcc(dbuf0, src->format.i_chroma));
+#endif
+
+            // At this point I think the subtitles are being placed in the
+            // coord space of the cfg rectangle
+            if ((sys->subpic_bufs[n] = hw_mmal_vzc_buf_from_pic(sys->vzc,
+                src,
+                (MMAL_RECT_T){.width = vd->cfg->display.width, .height=vd->cfg->display.height},
+                sreg->i_x, sreg->i_y,
+                sreg->i_alpha,
+                n == 0)) == NULL)
+            {
+                msg_Err(vd, "Failed to allocate vzc buffer for subpic");
+                return VLC_ENOMEM;
+            }
+
+            if (++n == SUBS_MAX)
+                return VLC_SUCCESS;
+        }
+    }
+    return VLC_SUCCESS;
 }
 
-static void input_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
+
+static void vd_prepare(vout_display_t *vd, picture_t *p_pic,
+#if VLC_VER_3
+                       subpicture_t *subpicture
+#else
+                       subpicture_t *subpicture, vlc_tick_t date
+#endif
+                       )
 {
-    vout_display_t *vd = (vout_display_t *)port->userdata;
+    MMAL_STATUS_T err;
+    vout_display_sys_t * const sys = vd->sys;
+
+    vd_manage(vd);
+
+    if (sys->force_config ||
+        p_pic->format.i_frame_rate != sys->i_frame_rate ||
+        p_pic->format.i_frame_rate_base != sys->i_frame_rate_base ||
+        p_pic->b_progressive != sys->b_progressive ||
+        p_pic->b_top_field_first != sys->b_top_field_first)
+    {
+        sys->force_config = false;
+        sys->b_top_field_first = p_pic->b_top_field_first;
+        sys->b_progressive = p_pic->b_progressive;
+        sys->i_frame_rate = p_pic->format.i_frame_rate;
+        sys->i_frame_rate_base = p_pic->format.i_frame_rate_base;
+        configure_display(vd, NULL, &p_pic->format);
+    }
+
+    // Subpics can either turn up attached to the main pic or in the
+    // subpic list here  - if they turn up here then process into temp
+    // buffers
+    if (subpicture != NULL) {
+        attach_subpics(vd, sys, subpicture);
+    }
+
+    // *****
+    if (want_copy(vd)) {
+        if (sys->copy_buf != NULL) {
+            msg_Err(vd, "Copy buf not NULL");
+            mmal_buffer_header_release(sys->copy_buf);
+            sys->copy_buf = NULL;
+        }
+
+        MMAL_BUFFER_HEADER_T * const buf = mmal_queue_wait(sys->copy_pool->queue);
+        // Copy 2d
+        hw_mmal_copy_pic_to_buf(buf->data, &buf->length, sys->input->format, p_pic);
+        buf->flags = MMAL_BUFFER_HEADER_FLAG_FRAME_END;
+
+        sys->copy_buf = buf;
+    }
+
+    if (isp_check(vd, sys) != MMAL_SUCCESS) {
+        return;
+    }
+
+    if (want_isp(vd))
+    {
+        struct vout_isp_conf_s * const isp = &sys->isp;
+        MMAL_BUFFER_HEADER_T * buf;
+
+        // This should be empty - make it so if it isn't
+        isp_empty_out_q(isp);
+        isp->pending = false;
+
+        // Stuff output
+        if (isp_prepare(vd, isp) != MMAL_SUCCESS)
+            return;
+
+        if ((buf = hw_mmal_pic_buf_replicated(p_pic, isp->in_pool)) == NULL)
+        {
+            msg_Err(vd, "Pic has no attached buffer");
+            return;
+        }
+
+        if ((err = mmal_port_send_buffer(isp->input, buf)) != MMAL_SUCCESS)
+        {
+            msg_Err(vd, "Send buffer to input failed");
+            mmal_buffer_header_release(buf);
+            return;
+        }
+
+        isp->pending = true;
+    }
+
+#if 0
+    VLC_UNUSED(date);
     vout_display_sys_t *sys = vd->sys;
-    picture_t *picture = (picture_t *)buffer->user_data;
+    picture_sys_t *pic_sys = picture->p_sys;
 
-    if (picture)
-        picture_Release(picture);
+    if (!sys->adjust_refresh_rate || pic_sys->displayed)
+        return;
 
-    vlc_mutex_lock(&sys->buffer_mutex);
-    atomic_fetch_sub(&sys->buffers_in_transit, 1);
-    vlc_cond_signal(&sys->buffer_cond);
-    vlc_mutex_unlock(&sys->buffer_mutex);
+    /* Apply the required phase_offset to the picture, so that vd_display()
+     * will be called at the corrected time from the core */
+    picture->date += sys->phase_offset;
+#endif
 }
 
-static int query_resolution(vout_display_t *vd, unsigned *width, unsigned *height)
+
+static void vd_control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
 {
-    TV_DISPLAY_STATE_T display_state;
-    int ret = 0;
+    vout_display_t *vd = (vout_display_t *)port->userdata;
+    MMAL_STATUS_T status;
 
-    if (vc_tv_get_display_state(&display_state) == 0) {
-        if (display_state.state & 0xFF) {
-            *width = display_state.display.hdmi.width;
-            *height = display_state.display.hdmi.height;
-        } else if (display_state.state & 0xFF00) {
-            *width = display_state.display.sdtv.width;
-            *height = display_state.display.sdtv.height;
-        } else {
-            msg_Warn(vd, "Invalid display state %"PRIx32, display_state.state);
-            ret = -1;
-        }
-    } else {
-        msg_Warn(vd, "Failed to query display resolution");
-        ret = -1;
+    if (buffer->cmd == MMAL_EVENT_ERROR) {
+        status = *(uint32_t *)buffer->data;
+        msg_Err(vd, "MMAL error %"PRIx32" \"%s\"", status, mmal_status_to_string(status));
     }
 
-    return ret;
+    mmal_buffer_header_release(buffer);
 }
 
 static void tvservice_cb(void *callback_data, uint32_t reason, uint32_t param1, uint32_t param2)
@@ -780,9 +1082,9 @@
     double best_score, score;
     int i;
 
-    vc_tv_get_display_state(&display_state);
+    vc_tv_get_display_state_id(sys->display_id, &display_state);
     if(display_state.display.hdmi.mode != HDMI_MODE_OFF) {
-        num_modes = vc_tv_hdmi_get_supported_modes_new(display_state.display.hdmi.group,
+        num_modes = vc_tv_hdmi_get_supported_modes_new_id(sys->display_id, display_state.display.hdmi.group,
                         supported_modes, VC_TV_MAX_MODE_IDS, NULL, NULL);
 
         for (i = 0; i < num_modes; ++i) {
@@ -810,7 +1112,7 @@
         if((best_id >= 0) && (display_state.display.hdmi.mode != supported_modes[best_id].code)) {
             msg_Info(vd, "Setting HDMI refresh rate to %"PRIu32,
                             supported_modes[best_id].frame_rate);
-            vc_tv_hdmi_power_on_explicit_new(HDMI_MODE_HDMI,
+            vc_tv_hdmi_power_on_explicit_new_id(sys->display_id, HDMI_MODE_HDMI,
                             supported_modes[best_id].group,
                             supported_modes[best_id].code);
         }
@@ -828,148 +1130,12 @@
     }
 }
 
-static void display_subpicture(vout_display_t *vd, subpicture_t *subpicture)
-{
-    vout_display_sys_t *sys = vd->sys;
-    struct dmx_region_t **dmx_region = &sys->dmx_region;
-    struct dmx_region_t *unused_dmx_region;
-    DISPMANX_UPDATE_HANDLE_T update = 0;
-    picture_t *picture;
-    video_format_t *fmt;
-    struct dmx_region_t *dmx_region_next;
-
-    if(subpicture) {
-        subpicture_region_t *region = subpicture->p_region;
-        while(region) {
-            picture = region->p_picture;
-            fmt = &region->fmt;
-
-            if(!*dmx_region) {
-                if(!update)
-                    update = vc_dispmanx_update_start(10);
-                *dmx_region = dmx_region_new(vd, update, region);
-            } else if(((*dmx_region)->bmp_rect.width != (int32_t)fmt->i_visible_width) ||
-                    ((*dmx_region)->bmp_rect.height != (int32_t)fmt->i_visible_height) ||
-                    ((*dmx_region)->pos_x != region->i_x) ||
-                    ((*dmx_region)->pos_y != region->i_y) ||
-                    ((*dmx_region)->alpha.opacity != (uint32_t)region->i_alpha)) {
-                dmx_region_next = (*dmx_region)->next;
-                if(!update)
-                    update = vc_dispmanx_update_start(10);
-                dmx_region_delete(*dmx_region, update);
-                *dmx_region = dmx_region_new(vd, update, region);
-                (*dmx_region)->next = dmx_region_next;
-            } else if((*dmx_region)->picture != picture) {
-                if(!update)
-                    update = vc_dispmanx_update_start(10);
-                dmx_region_update(*dmx_region, update, picture);
-            }
-
-            dmx_region = &(*dmx_region)->next;
-            region = region->p_next;
-        }
-    }
-
-    /* Remove remaining regions */
-    unused_dmx_region = *dmx_region;
-    while(unused_dmx_region) {
-        dmx_region_next = unused_dmx_region->next;
-        if(!update)
-            update = vc_dispmanx_update_start(10);
-        dmx_region_delete(unused_dmx_region, update);
-        unused_dmx_region = dmx_region_next;
-    }
-    *dmx_region = NULL;
-
-    if(update)
-        vc_dispmanx_update_submit_sync(update);
-}
-
-static void close_dmx(vout_display_t *vd)
-{
-    vout_display_sys_t *sys = vd->sys;
-    DISPMANX_UPDATE_HANDLE_T update = vc_dispmanx_update_start(10);
-    struct dmx_region_t *dmx_region = sys->dmx_region;
-    struct dmx_region_t *dmx_region_next;
-
-    while(dmx_region) {
-        dmx_region_next = dmx_region->next;
-        dmx_region_delete(dmx_region, update);
-        dmx_region = dmx_region_next;
-    }
-
-    vc_dispmanx_update_submit_sync(update);
-    sys->dmx_region = NULL;
-
-    show_background(vd, false);
-
-    vc_dispmanx_display_close(sys->dmx_handle);
-    sys->dmx_handle = DISPMANX_NO_HANDLE;
-}
-
-static struct dmx_region_t *dmx_region_new(vout_display_t *vd,
-                DISPMANX_UPDATE_HANDLE_T update, subpicture_region_t *region)
-{
-    vout_display_sys_t *sys = vd->sys;
-    video_format_t *fmt = &region->fmt;
-    struct dmx_region_t *dmx_region = malloc(sizeof(struct dmx_region_t));
-    uint32_t image_handle;
-
-    dmx_region->pos_x = region->i_x;
-    dmx_region->pos_y = region->i_y;
-
-    vc_dispmanx_rect_set(&dmx_region->bmp_rect, 0, 0, fmt->i_visible_width,
-                    fmt->i_visible_height);
-    vc_dispmanx_rect_set(&dmx_region->src_rect, 0, 0, fmt->i_visible_width << 16,
-                    fmt->i_visible_height << 16);
-    vc_dispmanx_rect_set(&dmx_region->dst_rect, region->i_x, region->i_y,
-                    fmt->i_visible_width, fmt->i_visible_height);
-
-    dmx_region->resource = vc_dispmanx_resource_create(VC_IMAGE_RGBA32,
-                    dmx_region->bmp_rect.width | (region->p_picture->p[0].i_pitch << 16),
-                    dmx_region->bmp_rect.height | (dmx_region->bmp_rect.height << 16),
-                    &image_handle);
-    vc_dispmanx_resource_write_data(dmx_region->resource, VC_IMAGE_RGBA32,
-                    region->p_picture->p[0].i_pitch,
-                    region->p_picture->p[0].p_pixels, &dmx_region->bmp_rect);
-
-    dmx_region->alpha.flags = DISPMANX_FLAGS_ALPHA_FROM_SOURCE | DISPMANX_FLAGS_ALPHA_MIX;
-    dmx_region->alpha.opacity = region->i_alpha;
-    dmx_region->alpha.mask = DISPMANX_NO_HANDLE;
-    dmx_region->element = vc_dispmanx_element_add(update, sys->dmx_handle,
-                    sys->layer + 1, &dmx_region->dst_rect, dmx_region->resource,
-                    &dmx_region->src_rect, DISPMANX_PROTECTION_NONE,
-                    &dmx_region->alpha, NULL, VC_IMAGE_ROT0);
-
-    dmx_region->next = NULL;
-    dmx_region->picture = region->p_picture;
-
-    return dmx_region;
-}
-
-static void dmx_region_update(struct dmx_region_t *dmx_region,
-                DISPMANX_UPDATE_HANDLE_T update, picture_t *picture)
-{
-    vc_dispmanx_resource_write_data(dmx_region->resource, VC_IMAGE_RGBA32,
-                    picture->p[0].i_pitch, picture->p[0].p_pixels, &dmx_region->bmp_rect);
-    vc_dispmanx_element_change_source(update, dmx_region->element, dmx_region->resource);
-    dmx_region->picture = picture;
-}
-
-static void dmx_region_delete(struct dmx_region_t *dmx_region,
-                DISPMANX_UPDATE_HANDLE_T update)
-{
-    vc_dispmanx_element_remove(update, dmx_region->element);
-    vc_dispmanx_resource_delete(dmx_region->resource);
-    free(dmx_region);
-}
-
 static void maintain_phase_sync(vout_display_t *vd)
 {
     MMAL_PARAMETER_VIDEO_RENDER_STATS_T render_stats = {
         .hdr = { MMAL_PARAMETER_VIDEO_RENDER_STATS, sizeof(render_stats) },
     };
-    int32_t frame_duration = 1000000 /
+    int32_t frame_duration = CLOCK_FREQ /
         ((double)vd->sys->i_frame_rate /
         vd->sys->i_frame_rate_base);
     vout_display_sys_t *sys = vd->sys;
@@ -1012,32 +1178,317 @@
     }
 }
 
-static void show_background(vout_display_t *vd, bool enable)
+static void CloseMmalVout(vlc_object_t *object)
 {
-    vout_display_sys_t *sys = vd->sys;
-    uint32_t image_ptr, color = 0xFF000000;
-    VC_RECT_T dst_rect, src_rect;
-    DISPMANX_UPDATE_HANDLE_T update;
-
-    if (enable && !sys->bkg_element) {
-        sys->bkg_resource = vc_dispmanx_resource_create(VC_IMAGE_RGBA32, 1, 1,
-                        &image_ptr);
-        vc_dispmanx_rect_set(&dst_rect, 0, 0, 1, 1);
-        vc_dispmanx_resource_write_data(sys->bkg_resource, VC_IMAGE_RGBA32,
-                        sizeof(color), &color, &dst_rect);
-        vc_dispmanx_rect_set(&src_rect, 0, 0, 1 << 16, 1 << 16);
-        vc_dispmanx_rect_set(&dst_rect, 0, 0, 0, 0);
-        update = vc_dispmanx_update_start(0);
-        sys->bkg_element = vc_dispmanx_element_add(update, sys->dmx_handle,
-                        sys->layer - 1, &dst_rect, sys->bkg_resource, &src_rect,
-                        DISPMANX_PROTECTION_NONE, NULL, NULL, VC_IMAGE_ROT0);
-        vc_dispmanx_update_submit_sync(update);
-    } else if (!enable && sys->bkg_element) {
-        update = vc_dispmanx_update_start(0);
-        vc_dispmanx_element_remove(update, sys->bkg_element);
-        vc_dispmanx_resource_delete(sys->bkg_resource);
-        vc_dispmanx_update_submit_sync(update);
-        sys->bkg_element = DISPMANX_NO_HANDLE;
-        sys->bkg_resource = DISPMANX_NO_HANDLE;
+    vout_display_t * const vd = (vout_display_t *)object;
+    vout_display_sys_t * const sys = vd->sys;
+    char response[20]; /* answer is hvs_update_fields=%1d */
+
+#if TRACE_ALL
+    msg_Dbg(vd, "<<< %s", __func__);
+#endif
+
+    kill_pool(sys);
+
+    vc_tv_unregister_callback_full(tvservice_cb, vd);
+
+    // Shouldn't be anything here - but just in case
+    for (unsigned int i = 0; i != SUBS_MAX; ++i)
+        if (sys->subpic_bufs[i] != NULL)
+            mmal_buffer_header_release(sys->subpic_bufs[i]);
+
+    for (unsigned int i = 0; i != SUBS_MAX; ++i) {
+        vout_subpic_t * const sub = sys->subs + i;
+        if (sub->component != NULL) {
+            hw_mmal_subpic_close(VLC_OBJECT(vd), &sub->sub);
+            if (sub->component->control->is_enabled)
+                mmal_port_disable(sub->component->control);
+            if (sub->component->is_enabled)
+                mmal_component_disable(sub->component);
+            mmal_component_release(sub->component);
+            sub->component = NULL;
+        }
     }
+
+    if (sys->input && sys->input->is_enabled)
+        mmal_port_disable(sys->input);
+
+    if (sys->component && sys->component->control->is_enabled)
+        mmal_port_disable(sys->component->control);
+
+    if (sys->copy_buf != NULL)
+        mmal_buffer_header_release(sys->copy_buf);
+
+    if (sys->input != NULL && sys->copy_pool != NULL)
+        mmal_port_pool_destroy(sys->input, sys->copy_pool);
+
+    if (sys->component && sys->component->is_enabled)
+        mmal_component_disable(sys->component);
+
+    if (sys->pool)
+        mmal_pool_destroy(sys->pool);
+
+    if (sys->component)
+        mmal_component_release(sys->component);
+
+    isp_close(vd, sys);
+
+    hw_mmal_vzc_pool_release(sys->vzc);
+
+    vlc_mutex_destroy(&sys->manage_mutex);
+
+    if (sys->native_interlaced) {
+        if (vc_gencmd(response, sizeof(response), "hvs_update_fields 0") < 0 ||
+                response[18] != '0')
+            msg_Warn(vd, "Could not reset hvs field mode");
+    }
+
+    cma_vcsm_exit(sys->init_type);;
+
+    free(sys);
+
+#if TRACE_ALL
+    msg_Dbg(vd, ">>> %s", __func__);
+#endif
+}
+
+
+static const struct {
+    const char * name;
+    int num;
+} display_name_to_num[] = {
+    {"auto",    -1},
+    {"hdmi-1",  DISPMANX_ID_HDMI0},
+    {"hdmi-2",  DISPMANX_ID_HDMI1},
+    {NULL,      -2}
+};
+
+static int find_display_num(const char * name)
+{
+    unsigned int i;
+    for (i = 0; display_name_to_num[i].name != NULL && strcasecmp(display_name_to_num[i].name, name) != 0; ++i)
+        /* Loop */;
+    return display_name_to_num[i].num;
+}
+
+static int OpenMmalVout(vlc_object_t *object)
+{
+    vout_display_t *vd = (vout_display_t *)object;
+    vout_display_sys_t *sys;
+    MMAL_DISPLAYREGION_T display_region;
+    MMAL_STATUS_T status;
+    int ret = VLC_EGENERIC;
+    // At the moment all copy is via I420
+    const bool needs_copy = !hw_mmal_chroma_is_mmal(vd->fmt.i_chroma);
+    const MMAL_FOURCC_T enc_in = needs_copy ? MMAL_ENCODING_I420 :
+        vout_vlc_to_mmal_pic_fourcc(vd->fmt.i_chroma);
+
+#if TRACE_ALL
+    msg_Dbg(vd, "<<< %s", __func__);
+#endif
+
+    sys = calloc(1, sizeof(struct vout_display_sys_t));
+    if (!sys)
+        return VLC_ENOMEM;
+    vd->sys = sys;
+
+    vlc_mutex_init(&sys->manage_mutex);
+
+    if ((sys->init_type = cma_vcsm_init()) == VCSM_INIT_NONE)
+    {
+        msg_Err(vd, "VCSM init fail");
+        goto fail;
+    }
+
+    vc_tv_register_callback(tvservice_cb, vd);
+
+    sys->layer = var_InheritInteger(vd, MMAL_LAYER_NAME);
+
+    {
+        const char *display_name = var_InheritString(vd, MMAL_DISPLAY_NAME);
+        int qt_num = var_InheritInteger(vd, "qt-fullscreen-screennumber" );
+        int display_id = find_display_num(display_name);
+//        sys->display_id = display_id < 0 ? vc_tv_get_default_display_id() : display_id;
+        sys->display_id = display_id >= 0 ? display_id :
+            qt_num == 1 ? DISPMANX_ID_HDMI1 : DISPMANX_ID_HDMI;
+        if (display_id < -1)
+            msg_Warn(vd, "Unknown display device: '%s'", display_name);
+        else
+            msg_Dbg(vd, "Display device: %s, qt=%d id=%d display=%d", display_name,
+                    qt_num, display_id, sys->display_id);
+    }
+
+    status = mmal_component_create(MMAL_COMPONENT_DEFAULT_VIDEO_RENDERER, &sys->component);
+    if (status != MMAL_SUCCESS) {
+        msg_Err(vd, "Failed to create MMAL component %s (status=%"PRIx32" %s)",
+                        MMAL_COMPONENT_DEFAULT_VIDEO_RENDERER, status, mmal_status_to_string(status));
+        goto fail;
+    }
+
+    sys->component->control->userdata = (struct MMAL_PORT_USERDATA_T *)vd;
+    status = mmal_port_enable(sys->component->control, vd_control_port_cb);
+    if (status != MMAL_SUCCESS) {
+        msg_Err(vd, "Failed to enable control port %s (status=%"PRIx32" %s)",
+                        sys->component->control->name, status, mmal_status_to_string(status));
+        goto fail;
+    }
+
+    sys->input = sys->component->input[0];
+    sys->input->userdata = (struct MMAL_PORT_USERDATA_T *)vd;
+
+    sys->input->format->encoding = enc_in;
+    sys->input->format->encoding_variant = 0;
+    sys->i_planes = 1;
+
+    display_set_format(vd, sys->input->format, want_isp(vd));
+
+    status = port_parameter_set_bool(sys->input, MMAL_PARAMETER_ZERO_COPY, true);
+    if (status != MMAL_SUCCESS) {
+       msg_Err(vd, "Failed to set zero copy on port %s (status=%"PRIx32" %s)",
+                sys->input->name, status, mmal_status_to_string(status));
+       goto fail;
+    }
+
+    status = mmal_port_format_commit(sys->input);
+    if (status != MMAL_SUCCESS) {
+        msg_Err(vd, "Failed to commit format for input port %s (status=%"PRIx32" %s)",
+                        sys->input->name, status, mmal_status_to_string(status));
+        goto fail;
+    }
+
+    sys->input->buffer_size = sys->input->buffer_size_recommended;
+
+    if (!needs_copy) {
+        sys->input->buffer_num = 30;
+    }
+    else {
+        sys->input->buffer_num = 2;
+        if ((sys->copy_pool = mmal_port_pool_create(sys->input, 2, sys->input->buffer_size)) == NULL)
+        {
+            msg_Err(vd, "Cannot create copy pool");
+            goto fail;
+        }
+    }
+
+    if (query_resolution(vd, sys->display_id, &sys->display_width, &sys->display_height) < 0)
+    {
+        sys->display_width = vd->cfg->display.width;
+        sys->display_height = vd->cfg->display.height;
+    }
+
+    place_dest(vd, sys, vd->cfg, &vd->source);  // Sets sys->dest_rect
+
+    display_region.hdr.id = MMAL_PARAMETER_DISPLAYREGION;
+    display_region.hdr.size = sizeof(MMAL_DISPLAYREGION_T);
+    display_region.display_num = sys->display_id;
+    display_region.fullscreen = MMAL_FALSE;
+    display_src_rect(vd, &display_region.src_rect);
+    display_region.dest_rect = sys->dest_rect;
+    display_region.layer = sys->layer;
+    display_region.set =
+        MMAL_DISPLAY_SET_NUM |
+        MMAL_DISPLAY_SET_FULLSCREEN | MMAL_DISPLAY_SET_SRC_RECT |
+        MMAL_DISPLAY_SET_DEST_RECT | MMAL_DISPLAY_SET_LAYER;
+    status = mmal_port_parameter_set(sys->input, &display_region.hdr);
+    if (status != MMAL_SUCCESS) {
+        msg_Err(vd, "Failed to set display region (status=%"PRIx32" %s)",
+                        status, mmal_status_to_string(status));
+        goto fail;
+    }
+
+    status = mmal_port_enable(sys->input, vd_input_port_cb);
+    if (status != MMAL_SUCCESS) {
+        msg_Err(vd, "Failed to enable input port %s (status=%"PRIx32" %s)",
+                sys->input->name, status, mmal_status_to_string(status));
+        goto fail;
+    }
+
+    status = mmal_component_enable(sys->component);
+    if (status != MMAL_SUCCESS) {
+        msg_Err(vd, "Failed to enable component %s (status=%"PRIx32" %s)",
+                sys->component->name, status, mmal_status_to_string(status));
+        goto fail;
+    }
+
+    if ((sys->pool = mmal_pool_create(sys->input->buffer_num, 0)) == NULL)
+    {
+        msg_Err(vd, "Failed to create input pool");
+        goto fail;
+    }
+
+    for (unsigned int i = 0; i != SUBS_MAX; ++i) {
+        vout_subpic_t * const sub = sys->subs + i;
+        if ((status = mmal_component_create(MMAL_COMPONENT_DEFAULT_VIDEO_RENDERER, &sub->component)) != MMAL_SUCCESS)
+        {
+            msg_Dbg(vd, "Failed to create subpic component %d", i);
+            goto fail;
+        }
+        sub->component->control->userdata = (struct MMAL_PORT_USERDATA_T *)vd;
+        if ((status = mmal_port_enable(sub->component->control, vd_control_port_cb)) != MMAL_SUCCESS) {
+            msg_Err(vd, "Failed to enable control port %s on sub %d (status=%"PRIx32" %s)",
+                            sys->component->control->name, i, status, mmal_status_to_string(status));
+            goto fail;
+        }
+        if ((status = hw_mmal_subpic_open(VLC_OBJECT(vd), &sub->sub, sub->component->input[0],
+                                          sys->display_id, sys->layer + i + 1)) != MMAL_SUCCESS) {
+            msg_Dbg(vd, "Failed to open subpic %d", i);
+            goto fail;
+        }
+        if ((status = mmal_component_enable(sub->component)) != MMAL_SUCCESS)
+        {
+            msg_Dbg(vd, "Failed to enable subpic component %d", i);
+            goto fail;
+        }
+    }
+
+    // If we can't deal with it directly ask for I420
+    vd->fmt.i_chroma = req_chroma(vd);
+
+    vd->info = (vout_display_info_t){
+        .is_slow = false,
+        .has_double_click = false,
+        .needs_hide_mouse = false,
+        .has_pictures_invalid = true,
+        .subpicture_chromas = hw_mmal_vzc_subpicture_chromas
+    };
+
+    vd->pool = vd_pool;
+    vd->prepare = vd_prepare;
+    vd->display = vd_display;
+    vd->control = vd_control;
+
+
+    msg_Dbg(vd, ">>> %s: ok", __func__);
+    return VLC_SUCCESS;
+
+fail:
+    CloseMmalVout(object);
+
+    msg_Dbg(vd, ">>> %s: rv=%d", __func__, ret);
+
+    return ret == VLC_SUCCESS ? VLC_EGENERIC : ret;
 }
+
+vlc_module_begin()
+
+    add_submodule()
+
+    set_shortname(N_("MMAL vout"))
+    set_description(N_("MMAL-based vout plugin for Raspberry Pi"))
+    set_capability("vout display", 16)  // 1 point better than ASCII art
+    add_shortcut("mmal_vout")
+    set_category( CAT_VIDEO )
+    set_subcategory( SUBCAT_VIDEO_VOUT )
+
+    add_integer(MMAL_LAYER_NAME, 1, MMAL_LAYER_TEXT, MMAL_LAYER_LONGTEXT, false)
+    add_bool(MMAL_ADJUST_REFRESHRATE_NAME, false, MMAL_ADJUST_REFRESHRATE_TEXT,
+                    MMAL_ADJUST_REFRESHRATE_LONGTEXT, false)
+    add_bool(MMAL_NATIVE_INTERLACED, false, MMAL_NATIVE_INTERLACE_TEXT,
+                    MMAL_NATIVE_INTERLACE_LONGTEXT, false)
+    add_string(MMAL_DISPLAY_NAME, "auto", MMAL_DISPLAY_TEXT,
+                    MMAL_DISPLAY_LONGTEXT, false)
+    set_callbacks(OpenMmalVout, CloseMmalVout)
+
+vlc_module_end()
+
+
--- /dev/null
+++ b/modules/hw/mmal/xsplitter.c
@@ -0,0 +1,560 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdatomic.h>
+
+#include <vlc_common.h>
+#include <vlc_plugin.h>
+#include <vlc_threads.h>
+#include <vlc_vout_display.h>
+#include <vlc_modules.h>
+
+#include <bcm_host.h>
+#include <interface/mmal/mmal.h>
+#include <interface/mmal/util/mmal_util.h>
+#include <interface/mmal/util/mmal_default_components.h>
+
+#include "mmal_picture.h"
+
+#define TRACE_ALL 0
+
+typedef struct display_desc_s
+{
+    vout_display_t * vout;
+    unsigned int max_pels;
+} display_desc_t;
+
+typedef struct mmal_x11_sys_s
+{
+    bool use_mmal;
+    display_desc_t * cur_desc;
+    display_desc_t mmal_desc;
+    display_desc_t x_desc;
+    uint32_t changed;
+    vlc_fourcc_t subpicture_chromas[16];
+} mmal_x11_sys_t;
+
+#define MAX_GL_PELS (1920*1080)
+#define MAX_MMAL_PELS (4096*4096)  // Should never be hit
+
+#if 0
+// Gen prog for the following table
+// Not done inline in case we end up pulling in FP libs we don't want
+#include <math.h>
+#include <stdio.h>
+
+int main(int argc, char *argv[])
+{
+    unsigned int i;
+    for (i = 0; i != 64; ++i)
+    {
+        printf(" [%2u]=%5u,", i, (unsigned int)(0.5 + (1/sqrt((i + 5)/4.0) * 65536.0)));
+        if (i % 4 == 3)
+            printf("\n");
+    }
+}
+#endif
+
+static const uint16_t sqrt_tab[64] = {
+    [ 0]=58617, [ 1]=53510, [ 2]=49541, [ 3]=46341,
+    [ 4]=43691, [ 5]=41449, [ 6]=39520, [ 7]=37837,
+    [ 8]=36353, [ 9]=35030, [10]=33843, [11]=32768,
+    [12]=31790, [13]=30894, [14]=30070, [15]=29309,
+    [16]=28602, [17]=27945, [18]=27330, [19]=26755,
+    [20]=26214, [21]=25705, [22]=25225, [23]=24770,
+    [24]=24339, [25]=23930, [26]=23541, [27]=23170,
+    [28]=22817, [29]=22479, [30]=22155, [31]=21845,
+    [32]=21548, [33]=21263, [34]=20988, [35]=20724,
+    [36]=20470, [37]=20225, [38]=19988, [39]=19760,
+    [40]=19539, [41]=19326, [42]=19119, [43]=18919,
+    [44]=18725, [45]=18536, [46]=18354, [47]=18176,
+    [48]=18004, [49]=17837, [50]=17674, [51]=17515,
+    [52]=17361, [53]=17211, [54]=17064, [55]=16921,
+    [56]=16782, [57]=16646, [58]=16514, [59]=16384,
+    [60]=16257, [61]=16134, [62]=16013, [63]=15895
+};
+#define SQRT_MAX (sizeof(sqrt_tab)/sizeof(sqrt_tab[0]) - 1)
+
+static bool cpy_fmt_limit_size(const display_desc_t * const dd,
+                           video_format_t * const dst,
+                           const video_format_t * const src)
+{
+    const unsigned int src_pel = src->i_visible_width * src->i_visible_height;
+
+    *dst = *src;
+
+    if (src_pel <= dd->max_pels)
+        return false;
+
+    // scaling factor sqrt(max_pel/cur_pel)
+    // sqrt done by lookup & 16 bit fixed-point maths - not exactly accurate but
+    // easily good enough & avoids floating point (which may be slow)
+    // src_pel > max_pel so n >= 0
+    // Rounding should be such that exact sqrts work and everything else rounds
+    // down
+    unsigned int n = ((src_pel * 4 - 1) / dd->max_pels) - 4;
+    unsigned int scale = sqrt_tab[n >= SQRT_MAX ? SQRT_MAX : n];
+
+    // Rescale width - rounding up to 16
+    unsigned int width = ((src->i_visible_width * scale + (16 << 16) - 1) >> 16) & ~15;
+    // Rescale height based on new width
+    unsigned int height = (src->i_visible_height * width + src->i_visible_width/2) / src->i_visible_width;
+
+//    fprintf(stderr, "%dx%d -> %dx%d\n", src->i_visible_width, src->i_visible_height, width, height);
+
+    dst->i_width          = width;
+    dst->i_visible_width  = width;
+    dst->i_height         = height;
+    dst->i_visible_height = height;
+    return true;
+}
+
+static void unload_display_module(vout_display_t * const x_vout)
+{
+    if (x_vout != NULL) {
+       if (x_vout->module != NULL) {
+            module_unneed(x_vout, x_vout->module);
+        }
+        vlc_object_release(x_vout);
+    }
+}
+
+static void CloseMmalX11(vlc_object_t *object)
+{
+    vout_display_t * const vd = (vout_display_t *)object;
+    mmal_x11_sys_t * const sys = (mmal_x11_sys_t *)vd->sys;
+
+    msg_Dbg(vd, "<<< %s", __func__);
+
+    if (sys == NULL)
+        return;
+
+    unload_display_module(sys->x_desc.vout);
+
+    unload_display_module(sys->mmal_desc.vout);
+
+    free(sys);
+
+    msg_Dbg(vd, ">>> %s", __func__);
+}
+
+static void mmal_x11_event(vout_display_t * x_vd, int cmd, va_list args)
+{
+    vout_display_t * const vd = x_vd->owner.sys;
+#if TRACE_ALL
+    msg_Dbg(vd, "<<< %s (cmd=%d)", __func__, cmd);
+#endif
+
+    // Do not fall into the display assert if Invalid not supported
+    if (cmd == VOUT_DISPLAY_EVENT_PICTURES_INVALID &&
+            !vd->info.has_pictures_invalid)
+        return;
+
+    vd->owner.event(vd, cmd, args);
+}
+
+static vout_window_t * mmal_x11_window_new(vout_display_t * x_vd, unsigned type)
+{
+    vout_display_t * const vd = x_vd->owner.sys;
+#if TRACE_ALL
+    msg_Dbg(vd, "<<< %s (type=%d)", __func__, type);
+#endif
+    return vd->owner.window_new(vd, type);
+}
+
+static void mmal_x11_window_del(vout_display_t * x_vd, vout_window_t * win)
+{
+    vout_display_t * const vd = x_vd->owner.sys;
+#if TRACE_ALL
+    msg_Dbg(vd, "<<< %s", __func__);
+#endif
+    vd->owner.window_del(vd, win);
+}
+
+
+static int load_display_module(vout_display_t * const vd,
+                                display_desc_t * const dd,
+                                const char * const cap,
+                                const char * const module_name)
+{
+    vout_display_t * const x_vout = vlc_object_create(vd, sizeof(*x_vout));
+
+    dd->vout = NULL;
+    if (!x_vout)
+        return -1;
+
+    x_vout->owner.sys = vd;
+    x_vout->owner.event = mmal_x11_event;
+    x_vout->owner.window_new = mmal_x11_window_new;
+    x_vout->owner.window_del = mmal_x11_window_del;
+
+    x_vout->cfg    = vd->cfg;
+    x_vout->info   = vd->info;
+    cpy_fmt_limit_size(dd, &x_vout->source, &vd->source);
+    cpy_fmt_limit_size(dd, &x_vout->fmt,    &vd->fmt);
+
+    if ((x_vout->module = module_need(x_vout, cap, module_name, true)) == NULL)
+    {
+        msg_Err(vd, "Failed to open Xsplitter:%s module", module_name);
+        goto fail;
+    }
+
+    msg_Dbg(vd, "R/G/B: %08x/%08x/%08x", x_vout->fmt.i_rmask, x_vout->fmt.i_gmask, x_vout->fmt.i_bmask);
+
+    dd->vout = x_vout;
+    return 0;
+
+fail:
+    vlc_object_release(x_vout);
+    return -1;
+}
+
+
+/* Return a pointer over the current picture_pool_t* (mandatory).
+ *
+ * For performance reasons, it is best to provide at least count
+ * pictures but it is not mandatory.
+ * You can return NULL when you cannot/do not want to allocate
+ * pictures.
+ * The vout display module keeps the ownership of the pool and can
+ * destroy it only when closing or on invalid pictures control.
+ */
+static picture_pool_t * mmal_x11_pool(vout_display_t * vd, unsigned count)
+{
+    mmal_x11_sys_t * const sys = (mmal_x11_sys_t *)vd->sys;
+    vout_display_t * const x_vd = sys->cur_desc->vout;
+#if TRACE_ALL
+    char buf0[5];
+    msg_Dbg(vd, "<<< %s (count=%d) %s:%dx%d->%s:%dx%d", __func__, count,
+            str_fourcc(buf0, vd->fmt.i_chroma),
+            vd->fmt.i_width, vd->fmt.i_height,
+            str_fourcc(buf0, x_vd->fmt.i_chroma),
+            x_vd->fmt.i_width, x_vd->fmt.i_height);
+#endif
+    picture_pool_t * pool = x_vd->pool(x_vd, count);
+#if TRACE_ALL
+    msg_Dbg(vd, ">>> %s: %p", __func__, pool);
+#endif
+    return pool;
+}
+
+/* Prepare a picture and an optional subpicture for display (optional).
+ *
+ * It is called before the next pf_display call to provide as much
+ * time as possible to prepare the given picture and the subpicture
+ * for display.
+ * You are guaranted that pf_display will always be called and using
+ * the exact same picture_t and subpicture_t.
+ * You cannot change the pixel content of the picture_t or of the
+ * subpicture_t.
+ */
+static void mmal_x11_prepare(vout_display_t * vd, picture_t * pic, subpicture_t * sub)
+{
+    mmal_x11_sys_t * const sys = (mmal_x11_sys_t *)vd->sys;
+    vout_display_t * const x_vd = sys->cur_desc->vout;
+#if TRACE_ALL
+    msg_Dbg(vd, "<<< %s", __func__);
+#endif
+    if (x_vd->prepare)
+        x_vd->prepare(x_vd, pic, sub);
+}
+
+/* Display a picture and an optional subpicture (mandatory).
+ *
+ * The picture and the optional subpicture must be displayed as soon as
+ * possible.
+ * You cannot change the pixel content of the picture_t or of the
+ * subpicture_t.
+ *
+ * This function gives away the ownership of the picture and of the
+ * subpicture, so you must release them as soon as possible.
+ */
+static void mmal_x11_display(vout_display_t * vd, picture_t * pic, subpicture_t * sub)
+{
+    mmal_x11_sys_t * const sys = (mmal_x11_sys_t *)vd->sys;
+    vout_display_t * const x_vd = sys->cur_desc->vout;
+
+#if TRACE_ALL
+    const bool is_mmal_pic = hw_mmal_pic_is_mmal(pic);
+    msg_Dbg(vd, "<<< %s: fmt: %dx%d/%dx%d, pic:%dx%d, pts=%lld, mmal=%d/%d", __func__, vd->fmt.i_width, vd->fmt.i_height, x_vd->fmt.i_width, x_vd->fmt.i_height, pic->format.i_width, pic->format.i_height, (long long)pic->date,
+            is_mmal_pic, sys->use_mmal);
+#endif
+
+    if (x_vd->fmt.i_chroma != pic->format.i_chroma ||
+        x_vd->fmt.i_width  != pic->format.i_width ||
+        x_vd->fmt.i_height != pic->format.i_height)
+    {
+        msg_Dbg(vd, "%s: Picture dropped", __func__);
+        picture_Release(pic);
+        if (sub != NULL)
+            subpicture_Delete(sub);
+        return;
+    }
+
+    x_vd->display(x_vd, pic, sub);
+}
+
+
+static int vout_display_Control(display_desc_t * const dd, int query, ...)
+{
+    va_list args;
+    int result;
+
+    va_start(args, query);
+    result = dd->vout->control(dd->vout, query, args);
+    va_end(args);
+
+    return result;
+}
+
+static bool want_mmal_vout(vout_display_t * const vd, const mmal_x11_sys_t * const sys)
+{
+    return sys->mmal_desc.vout != NULL &&
+        (sys->x_desc.vout == NULL || var_InheritBool(vd, "fullscreen"));
+}
+
+/* Control on the module (mandatory) */
+static int mmal_x11_control(vout_display_t * vd, int ctl, va_list va)
+{
+    mmal_x11_sys_t * const sys = (mmal_x11_sys_t *)vd->sys;
+    display_desc_t *x_desc = sys->cur_desc;
+    int rv;
+#if TRACE_ALL
+    msg_Dbg(vd, "<<< %s[%d] (ctl=%d)", __func__, sys->use_mmal, ctl);
+#endif
+    // Remember what we've told this vd - unwanted ctls ignored on replay
+    if (ctl >= 0 && ctl <= 31)
+        sys->changed |= (1 << ctl);
+
+    switch (ctl) {
+        case VOUT_DISPLAY_CHANGE_DISPLAY_SIZE:
+        {
+            const vout_display_cfg_t * const cfg = va_arg(va, const vout_display_cfg_t *);
+            const bool want_mmal = want_mmal_vout(vd, sys);
+            const bool swap_vout = (sys->use_mmal != want_mmal);
+            display_desc_t * const new_desc = want_mmal ? &sys->mmal_desc : &sys->x_desc;
+
+            msg_Dbg(vd, "Change size: %d, %d: mmal_vout=%p, want_mmal=%d, fs=%d",
+                    cfg->display.width, cfg->display.height, sys->mmal_desc.vout, want_mmal,
+                    var_InheritBool(vd, "fullscreen"));
+
+            if (swap_vout) {
+                if (sys->use_mmal) {
+                    vout_display_Control(x_desc, VOUT_DISPLAY_CHANGE_MMAL_HIDE);
+                }
+                vout_display_SendEventPicturesInvalid(vd);
+            }
+
+            rv = vout_display_Control(new_desc, ctl, cfg);
+            if (rv == VLC_SUCCESS) {
+                vd->fmt       = new_desc->vout->fmt;
+                sys->cur_desc = new_desc;
+                sys->use_mmal = want_mmal;
+            }
+
+            // Repeat any control calls that we sent to the previous vd
+            if (swap_vout && sys->changed != 0) {
+                const uint32_t changed = sys->changed;
+                sys->changed = 0;
+                if ((changed & (1 << VOUT_DISPLAY_CHANGE_DISPLAY_FILLED)) != 0)
+                    vout_display_Control(new_desc, VOUT_DISPLAY_CHANGE_DISPLAY_FILLED, vd->cfg);
+                if ((changed & (1 << VOUT_DISPLAY_CHANGE_ZOOM)) != 0)
+                    vout_display_Control(new_desc, VOUT_DISPLAY_CHANGE_ZOOM, vd->cfg);
+                if ((changed & ((1 << VOUT_DISPLAY_CHANGE_SOURCE_CROP) |
+                                (1 << VOUT_DISPLAY_CHANGE_SOURCE_ASPECT))) != 0)
+                    cpy_fmt_limit_size(new_desc, &new_desc->vout->source, &vd->source);
+                if ((changed & (1 << VOUT_DISPLAY_CHANGE_SOURCE_ASPECT)) != 0)
+                    vout_display_Control(new_desc, VOUT_DISPLAY_CHANGE_SOURCE_ASPECT);
+                if ((changed & (1 << VOUT_DISPLAY_CHANGE_SOURCE_CROP)) != 0)
+                    vout_display_Control(new_desc, VOUT_DISPLAY_CHANGE_SOURCE_CROP);
+                if ((changed & (1 << VOUT_DISPLAY_CHANGE_VIEWPOINT)) != 0)
+                    vout_display_Control(new_desc, VOUT_DISPLAY_CHANGE_VIEWPOINT, vd->cfg);
+            }
+
+            break;
+        }
+
+        case VOUT_DISPLAY_RESET_PICTURES:
+            {
+                char dbuf0[5], dbuf1[5], dbuf2[5];
+                msg_Dbg(vd, "<<< %s: Pic reset: fmt: %s,%dx%d<-%s,%dx%d, source: %s,%dx%d/%dx%d", __func__,
+                        str_fourcc(dbuf0, vd->fmt.i_chroma), vd->fmt.i_width, vd->fmt.i_height,
+                        str_fourcc(dbuf1, x_desc->vout->fmt.i_chroma), x_desc->vout->fmt.i_width, x_desc->vout->fmt.i_height,
+                        str_fourcc(dbuf2, vd->source.i_chroma), vd->source.i_width, vd->source.i_height, x_desc->vout->source.i_width,
+                        x_desc->vout->source.i_height);
+            }
+            // If the display doesn't have has_pictures_invalid then it doesn't
+            // expect RESET_PICTURES
+            if (sys->x_desc.vout->info.has_pictures_invalid) {
+                rv = sys->x_desc.vout->control(sys->x_desc.vout, ctl, va);
+            }
+            if (sys->mmal_desc.vout && sys->mmal_desc.vout->info.has_pictures_invalid) {
+                rv = sys->mmal_desc.vout->control(sys->mmal_desc.vout, ctl, va);
+            }
+            vd->fmt = x_desc->vout->fmt;
+            break;
+
+        case VOUT_DISPLAY_CHANGE_SOURCE_ASPECT:
+        case VOUT_DISPLAY_CHANGE_SOURCE_CROP:
+            cpy_fmt_limit_size(x_desc, &x_desc->vout->source, &vd->source);
+
+            /* FALLTHRU */
+        default:
+            rv = x_desc->vout->control(x_desc->vout, ctl, va);
+//            vd->fmt  = x_vd->fmt;
+            break;
+    }
+#if TRACE_ALL
+    msg_Dbg(vd, ">>> %s (rv=%d)", __func__, rv);
+#endif
+    return rv;
+}
+
+#define DO_MANAGE 0
+
+#if DO_MANAGE
+/* Manage pending event (optional) */
+static void mmal_x11_manage(vout_display_t * vd)
+{
+    mmal_x11_sys_t * const sys = (mmal_x11_sys_t *)vd->sys;
+    vout_display_t * const x_vd = sys->cur_desc->vout;
+#if TRACE_ALL
+    msg_Dbg(vd, "<<< %s", __func__);
+#endif
+    x_vd->manage(x_vd);
+}
+#endif
+
+static int OpenMmalX11(vlc_object_t *object)
+{
+    vout_display_t * const vd = (vout_display_t *)object;
+    mmal_x11_sys_t * const sys = calloc(1, sizeof(*sys));
+    int ret = VLC_SUCCESS;
+
+    if (sys == NULL) {
+        return VLC_EGENERIC;
+    }
+    vd->sys = (vout_display_sys_t *)sys;
+
+    vd->info = (vout_display_info_t){
+        .is_slow = false,
+        .has_double_click = false,
+        .needs_hide_mouse = false,
+        .has_pictures_invalid = true,
+        .subpicture_chromas = NULL
+    };
+
+    {
+        char dbuf0[5];
+        msg_Dbg(vd, ">>> %s: %s,%dx%d [(%d,%d) %d/%d] sar:%d/%d", __func__,
+                str_fourcc(dbuf0, vd->fmt.i_chroma),
+                vd->fmt.i_width,         vd->fmt.i_height,
+                vd->fmt.i_x_offset,      vd->fmt.i_y_offset,
+                vd->fmt.i_visible_width, vd->fmt.i_visible_height,
+                vd->fmt.i_sar_num,       vd->fmt.i_sar_den);
+    }
+
+    sys->x_desc.max_pels = MAX_GL_PELS;
+    sys->mmal_desc.max_pels = MAX_MMAL_PELS;
+
+    if (load_display_module(vd, &sys->x_desc, "vout display", "opengles2") == 0)
+    {
+        msg_Dbg(vd, "Opengles2 output found");
+    }
+    else
+    {
+        sys->x_desc.max_pels = MAX_MMAL_PELS;
+        if (load_display_module(vd, &sys->x_desc, "vout display", "xcb_x11") == 0)
+            msg_Dbg(vd, "X11 XCB output found");
+    }
+
+    if ((load_display_module(vd, &sys->mmal_desc, "vout display", "mmal_vout")) == 0)
+        msg_Dbg(vd, "MMAL output found");
+
+    if (sys->mmal_desc.vout == NULL && sys->x_desc.vout == NULL) {
+        char dbuf0[5], dbuf1[5];
+        msg_Info(vd, "No valid output found for vout (%s/%s)", str_fourcc(dbuf0, vd->fmt.i_chroma), str_fourcc(dbuf1, vd->source.i_chroma));
+        goto fail;
+    }
+
+    vd->pool = mmal_x11_pool;
+    vd->prepare = mmal_x11_prepare;
+    vd->display = mmal_x11_display;
+    vd->control = mmal_x11_control;
+#if DO_MANAGE
+    vd->manage = mmal_x11_manage;
+#endif
+
+    if (want_mmal_vout(vd, sys)) {
+        sys->cur_desc = &sys->mmal_desc;
+        sys->use_mmal = true;
+    }
+    else {
+        sys->cur_desc = &sys->x_desc;
+        sys->use_mmal = false;
+    }
+
+    if (sys->mmal_desc.vout == NULL || sys->x_desc.vout == NULL) {
+        vd->info = sys->cur_desc->vout->info;
+        vd->info.has_pictures_invalid = true;  // Should make this unwanted
+    }
+    else {
+        // We have both - construct a combination
+        vd->info = (vout_display_info_t){
+            .is_slow              = false,
+            .has_double_click     = sys->mmal_desc.vout->info.has_double_click || sys->x_desc.vout->info.has_double_click,
+            .needs_hide_mouse     = sys->mmal_desc.vout->info.needs_hide_mouse || sys->x_desc.vout->info.needs_hide_mouse,
+            .has_pictures_invalid = true,
+        };
+        // Construct intersection of subpicture chromas
+        // sys calloced so no need to add the terminating zero
+        if (sys->mmal_desc.vout->info.subpicture_chromas != NULL && sys->x_desc.vout->info.subpicture_chromas != NULL) {
+            unsigned int n = 0;
+            // N^2 - fix if we ever care
+            for (const vlc_fourcc_t * p1 = sys->mmal_desc.vout->info.subpicture_chromas; *p1 != 0 && n != 15; ++p1) {
+                for (const vlc_fourcc_t * p2 = sys->x_desc.vout->info.subpicture_chromas; *p2 != 0; ++p2) {
+                    if (*p1 == *p2) {
+                        sys->subpicture_chromas[n++] = *p1;
+                        break;
+                    }
+                }
+            }
+            if (n != 0)
+                vd->info.subpicture_chromas = sys->subpicture_chromas;
+        }
+    }
+    vd->fmt  = sys->cur_desc->vout->fmt;
+
+#if TRACE_ALL
+    {
+        char dbuf0[5];
+        msg_Dbg(vd, ">>> %s: (%s) %s,%dx%d [(%d,%d) %d/%d] sar:%d/%d", __func__,
+                module_get_name(sys->cur_desc->vout->module, false),
+                str_fourcc(dbuf0, vd->fmt.i_chroma),
+                vd->fmt.i_width,         vd->fmt.i_height,
+                vd->fmt.i_x_offset,      vd->fmt.i_y_offset,
+                vd->fmt.i_visible_width, vd->fmt.i_visible_height,
+                vd->fmt.i_sar_num,       vd->fmt.i_sar_den);
+    }
+#endif
+    return VLC_SUCCESS;
+
+fail:
+    CloseMmalX11(VLC_OBJECT(vd));
+    return ret == VLC_SUCCESS ? VLC_EGENERIC : ret;
+}
+
+
+
+
+vlc_module_begin()
+    set_shortname(N_("MMAL x11 splitter"))
+    set_description(N_("MMAL x11 splitter for Raspberry Pi"))
+    set_capability("vout display", 300)  // Between GLES & GL
+    add_shortcut("mmal_x11")
+    set_category( CAT_VIDEO )
+    set_subcategory( SUBCAT_VIDEO_VOUT )
+    set_callbacks(OpenMmalX11, CloseMmalX11)
+vlc_module_end()
+
--- a/modules/video_output/opengl/egl.c
+++ b/modules/video_output/opengl/egl.c
@@ -43,6 +43,8 @@
 # include "../android/utils.h"
 #endif
 
+#define REQUIRE_DMA_BUF_IMPORT 1
+
 typedef struct vlc_gl_sys_t
 {
     EGLDisplay display;
@@ -354,6 +356,14 @@
         goto error;
     }
 
+#if REQUIRE_DMA_BUF_IMPORT
+    if (!CheckToken(ext, "EGL_EXT_image_dma_buf_import"))
+    {
+        msg_Dbg(obj, "No dma_buf_import - fall back to X");
+        goto error;
+    }
+#endif
+
     const EGLint conf_attr[] = {
         EGL_RED_SIZE, 5,
         EGL_GREEN_SIZE, 5,
--- a/src/input/decoder.c
+++ b/src/input/decoder.c
@@ -1995,6 +1995,7 @@
     vlc_mutex_lock( &p_owner->lock );
     p_owner->b_waiting = false;
     vlc_cond_signal( &p_owner->wait_request );
+    vlc_mutex_unlock( &p_owner->lock );
 
     /* If the video output is paused or slow, or if the picture pool size was
      * under-estimated (e.g. greedy video filter, buggy decoder...), the
@@ -2005,7 +2006,6 @@
      * worker threads (if any) and the decoder thread to terminate. */
     if( p_owner->p_vout != NULL )
         vout_Cancel( p_owner->p_vout, true );
-    vlc_mutex_unlock( &p_owner->lock );
 
     vlc_join( p_owner->thread, NULL );
 
--- a/src/misc/fourcc.c
+++ b/src/misc/fourcc.c
@@ -755,8 +755,13 @@
     { { VLC_CODEC_VDPAU_VIDEO_420, VLC_CODEC_VDPAU_VIDEO_422,
         VLC_CODEC_VDPAU_VIDEO_444, VLC_CODEC_VDPAU_OUTPUT },
                                                FAKE_FMT() },
-    { { VLC_CODEC_ANDROID_OPAQUE, VLC_CODEC_MMAL_OPAQUE,
-        VLC_CODEC_D3D9_OPAQUE,    VLC_CODEC_D3D11_OPAQUE },
+    { { VLC_CODEC_ANDROID_OPAQUE },            FAKE_FMT() },
+    { { VLC_CODEC_MMAL_OPAQUE, VLC_CODEC_MMAL_ZC_SAND30   },
+                                               FAKE_FMT() },
+    { { VLC_CODEC_MMAL_ZC_I420,   VLC_CODEC_MMAL_ZC_SAND8,
+        VLC_CODEC_MMAL_ZC_SAND10, VLC_CODEC_MMAL_ZC_RGB32 },
+                                               FAKE_FMT() },
+    { { VLC_CODEC_D3D9_OPAQUE,    VLC_CODEC_D3D11_OPAQUE },
                                                FAKE_FMT() },
     { { VLC_CODEC_D3D11_OPAQUE_10B, VLC_CODEC_D3D9_OPAQUE_10B },
                                                FAKE_FMT() },
--- a/src/misc/picture.c
+++ b/src/misc/picture.c
@@ -365,10 +365,30 @@
     p_dst->b_top_field_first = p_src->b_top_field_first;
 }
 
+static inline bool is_zc_chroma(const vlc_fourcc_t i_chroma)
+{
+    return i_chroma == VLC_CODEC_MMAL_OPAQUE ||
+        i_chroma == VLC_CODEC_MMAL_ZC_I420 ||
+        i_chroma == VLC_CODEC_MMAL_ZC_RGB32 ||
+        i_chroma == VLC_CODEC_MMAL_ZC_SAND10 ||
+        i_chroma == VLC_CODEC_MMAL_ZC_SAND30 ||
+        i_chroma == VLC_CODEC_MMAL_ZC_SAND8;
+}
+
 void picture_CopyPixels( picture_t *p_dst, const picture_t *p_src )
 {
-    for( int i = 0; i < p_src->i_planes ; i++ )
-        plane_CopyPixels( p_dst->p+i, p_src->p+i );
+    if( is_zc_chroma(p_src->format.i_chroma) )
+    {
+        assert(p_dst->i_planes == 0);
+        p_dst->i_planes = p_src->i_planes;
+        for( int i = 0; i < p_src->i_planes; i++ )
+            p_dst->p[i] = p_src->p[i];
+    }
+    else
+    {
+        for( int i = 0; i < p_src->i_planes; i++ )
+            plane_CopyPixels( p_dst->p+i, p_src->p+i );
+    }
 
     assert( p_dst->context == NULL );
 
--- a/src/video_output/video_output.c
+++ b/src/video_output/video_output.c
@@ -964,6 +964,17 @@
     return NULL;
 }
 
+
+static inline bool is_zc_chroma(const vlc_fourcc_t i_chroma)
+{
+    return i_chroma == VLC_CODEC_MMAL_OPAQUE ||
+        i_chroma == VLC_CODEC_MMAL_ZC_I420 ||
+        i_chroma == VLC_CODEC_MMAL_ZC_RGB32 ||
+        i_chroma == VLC_CODEC_MMAL_ZC_SAND10 ||
+        i_chroma == VLC_CODEC_MMAL_ZC_SAND30 ||
+        i_chroma == VLC_CODEC_MMAL_ZC_SAND8;
+}
+
 static int ThreadDisplayRenderPicture(vout_thread_t *vout, bool is_forced)
 {
     vout_thread_sys_t *sys = vout->p;
@@ -1098,7 +1109,7 @@
     }
 
     assert(vout_IsDisplayFiltered(vd) == !sys->display.use_dr);
-    if (sys->display.use_dr && !is_direct) {
+    if (sys->display.use_dr && !is_direct && !is_zc_chroma(todisplay->format.i_chroma)) {
         picture_t *direct = NULL;
         if (likely(vout->p->display_pool != NULL))
             direct = picture_pool_Get(vout->p->display_pool);