tQcEewj9

· 8 years ago · Jun 03, 2017, 11:50 AM
1From aa1a8435000228c4d9e74da0f9fd3d16e85a3e80 Mon Sep 17 00:00:00 2001
2From: Loren Merritt <pengvado@akuvian.org>
3Date: Sat, 26 Jun 2010 20:55:59 -0700
4Subject: [PATCH 1/7] Simplify pixel_ads
5
6---
7 common/macroblock.c    |    2 +-
8 common/x86/pixel-a.asm |  175 +++++++++++++++++------------------------------
9 encoder/me.c           |    2 +-
10 3 files changed, 65 insertions(+), 114 deletions(-)
11
12diff --git a/common/macroblock.c b/common/macroblock.c
13index 8e9b06d..4561d8a 100644
14--- a/common/macroblock.c
15+++ b/common/macroblock.c
16@@ -341,7 +341,7 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
17         int buf_ssim = h->param.analyse.b_ssim * 8 * (h->param.i_width/4+3) * sizeof(int);
18         int me_range = X264_MIN(h->param.analyse.i_me_range, h->param.analyse.i_mv_range);
19         int buf_tesa = (h->param.analyse.i_me_method >= X264_ME_ESA) *
20-            ((me_range*2+18) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
21+            ((me_range*2+24) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
22         scratch_size = X264_MAX3( buf_hpel, buf_ssim, buf_tesa );
23     }
24     int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+3)&~3) * sizeof(int);
25diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm
26index 78ca4c7..1756f86 100644
27--- a/common/x86/pixel-a.asm
28+++ b/common/x86/pixel-a.asm
29@@ -2142,34 +2142,24 @@ cglobal pixel_ssim_end4_sse2, 3,3,7
30 ; Successive Elimination ADS
31 ;=============================================================================
32 
33-%macro ADS_START 1 ; unroll_size
34-%ifdef ARCH_X86_64
35-    %define t0 r6
36+%macro ADS_START 0
37 %ifdef WIN64
38-    mov     r4,  r4mp
39-    movsxd  r5,  dword r5m
40+    movsxd  r5,  r5d
41 %endif
42-    mov     r10, rsp
43-%else
44-    %define t0 r4
45-    mov     rbp, rsp
46-%endif
47-    mov     r0d, r5m
48-    sub     rsp, r0
49-    sub     rsp, %1*4-1
50-    and     rsp, ~15
51-    mov     t0,  rsp
52+    mov     r0d, r5d
53+    lea     r6,  [r4+r5+15]
54+    and     r6,  ~15;
55     shl     r2d,  1
56 %endmacro
57 
58-%macro ADS_END 1
59+%macro ADS_END 1 ; unroll_size
60     add     r1, 8*%1
61     add     r3, 8*%1
62-    add     t0, 4*%1
63+    add     r6, 4*%1
64     sub     r0d, 4*%1
65     jg .loop
66 %ifdef WIN64
67-    RESTORE_XMM r10
68+    RESTORE_XMM rsp
69 %endif
70     jmp ads_mvs
71 %endmacro
72@@ -2180,14 +2170,14 @@ cglobal pixel_ssim_end4_sse2, 3,3,7
73 ; int pixel_ads4( int enc_dc[4], uint16_t *sums, int delta,
74 ;                 uint16_t *cost_mvx, int16_t *mvs, int width, int thresh )
75 ;-----------------------------------------------------------------------------
76-cglobal pixel_ads4_mmxext, 4,7
77+cglobal pixel_ads4_mmxext, 6,7
78     movq    mm6, [r0]
79     movq    mm4, [r0+8]
80     pshufw  mm7, mm6, 0
81     pshufw  mm6, mm6, 0xAA
82     pshufw  mm5, mm4, 0
83     pshufw  mm4, mm4, 0xAA
84-    ADS_START 1
85+    ADS_START
86 .loop:
87     movq    mm0, [r1]
88     movq    mm1, [r1+16]
89@@ -2204,25 +2194,19 @@ cglobal pixel_ads4_mmxext, 4,7
90     ABS1    mm3, mm1
91     paddw   mm0, mm2
92     paddw   mm0, mm3
93-%ifdef WIN64
94-    pshufw  mm1, [r10+stack_offset+56], 0
95-%elifdef ARCH_X86_64
96-    pshufw  mm1, [r10+8], 0
97-%else
98-    pshufw  mm1, [ebp+stack_offset+28], 0
99-%endif
100+    pshufw  mm1, r6m, 0
101     paddusw mm0, [r3]
102     psubusw mm1, mm0
103     packsswb mm1, mm1
104-    movd    [t0], mm1
105+    movd    [r6], mm1
106     ADS_END 1
107 
108-cglobal pixel_ads2_mmxext, 4,7
109+cglobal pixel_ads2_mmxext, 6,7
110     movq    mm6, [r0]
111     pshufw  mm5, r6m, 0
112     pshufw  mm7, mm6, 0
113     pshufw  mm6, mm6, 0xAA
114-    ADS_START 1
115+    ADS_START
116 .loop:
117     movq    mm0, [r1]
118     movq    mm1, [r1+r2]
119@@ -2235,13 +2219,13 @@ cglobal pixel_ads2_mmxext, 4,7
120     movq    mm4, mm5
121     psubusw mm4, mm0
122     packsswb mm4, mm4
123-    movd    [t0], mm4
124+    movd    [r6], mm4
125     ADS_END 1
126 
127-cglobal pixel_ads1_mmxext, 4,7
128+cglobal pixel_ads1_mmxext, 6,7
129     pshufw  mm7, [r0], 0
130     pshufw  mm6, r6m, 0
131-    ADS_START 2
132+    ADS_START
133 .loop:
134     movq    mm0, [r1]
135     movq    mm1, [r1+8]
136@@ -2256,11 +2240,11 @@ cglobal pixel_ads1_mmxext, 4,7
137     psubusw mm4, mm0
138     psubusw mm5, mm1
139     packsswb mm4, mm5
140-    movq    [t0], mm4
141+    movq    [r6], mm4
142     ADS_END 2
143 
144 %macro ADS_SSE2 1
145-cglobal pixel_ads4_%1, 4,7,12
146+cglobal pixel_ads4_%1, 6,7,12
147     movdqa  xmm4, [r0]
148     pshuflw xmm7, xmm4, 0
149     pshuflw xmm6, xmm4, 0xAA
150@@ -2273,7 +2257,7 @@ cglobal pixel_ads4_%1, 4,7,12
151 %ifdef ARCH_X86_64
152     pshuflw xmm8, r6m, 0
153     punpcklqdq xmm8, xmm8
154-    ADS_START 2
155+    ADS_START
156     movdqu  xmm10, [r1]
157     movdqu  xmm11, [r1+r2]
158 .loop:
159@@ -2299,9 +2283,9 @@ cglobal pixel_ads4_%1, 4,7,12
160     movdqa  xmm1, xmm8
161     psubusw xmm1, xmm0
162     packsswb xmm1, xmm1
163-    movq    [t0], xmm1
164+    movq    [r6], xmm1
165 %else
166-    ADS_START 2
167+    ADS_START
168 .loop:
169     movdqu  xmm0, [r1]
170     movdqu  xmm1, [r1+16]
171@@ -2318,18 +2302,18 @@ cglobal pixel_ads4_%1, 4,7,12
172     ABS1    xmm3, xmm1
173     paddw   xmm0, xmm2
174     paddw   xmm0, xmm3
175-    movd    xmm1, [ebp+stack_offset+28]
176+    movd    xmm1, r6m
177     movdqu  xmm2, [r3]
178     pshuflw xmm1, xmm1, 0
179     punpcklqdq xmm1, xmm1
180     paddusw xmm0, xmm2
181     psubusw xmm1, xmm0
182     packsswb xmm1, xmm1
183-    movq    [t0], xmm1
184+    movq    [r6], xmm1
185 %endif ; ARCH
186     ADS_END 2
187 
188-cglobal pixel_ads2_%1, 4,7,8
189+cglobal pixel_ads2_%1, 6,7,8
190     movq    xmm6, [r0]
191     movd    xmm5, r6m
192     pshuflw xmm7, xmm6, 0
193@@ -2338,7 +2322,7 @@ cglobal pixel_ads2_%1, 4,7,8
194     punpcklqdq xmm7, xmm7
195     punpcklqdq xmm6, xmm6
196     punpcklqdq xmm5, xmm5
197-    ADS_START 2
198+    ADS_START
199 .loop:
200     movdqu  xmm0, [r1]
201     movdqu  xmm1, [r1+r2]
202@@ -2352,17 +2336,17 @@ cglobal pixel_ads2_%1, 4,7,8
203     movdqa  xmm1, xmm5
204     psubusw xmm1, xmm0
205     packsswb xmm1, xmm1
206-    movq    [t0], xmm1
207+    movq    [r6], xmm1
208     ADS_END 2
209 
210-cglobal pixel_ads1_%1, 4,7,8
211+cglobal pixel_ads1_%1, 6,7,8
212     movd    xmm7, [r0]
213     movd    xmm6, r6m
214     pshuflw xmm7, xmm7, 0
215     pshuflw xmm6, xmm6, 0
216     punpcklqdq xmm7, xmm7
217     punpcklqdq xmm6, xmm6
218-    ADS_START 4
219+    ADS_START
220 .loop:
221     movdqu  xmm0, [r1]
222     movdqu  xmm1, [r1+16]
223@@ -2379,7 +2363,7 @@ cglobal pixel_ads1_%1, 4,7,8
224     psubusw xmm4, xmm0
225     psubusw xmm5, xmm1
226     packsswb xmm4, xmm5
227-    movdqa  [t0], xmm4
228+    movdqa  [r6], xmm4
229     ADS_END 4
230 %endmacro
231 
232@@ -2401,90 +2385,57 @@ ADS_SSE2 ssse3
233 ;     }
234 ;     return nmv;
235 ; }
236+
237+%macro TEST 1
238+    mov     [r4+r0*2], r1w
239+    test    r2d, 0xff<<(%1*8)
240+    setne   r3b
241+    add     r0d, r3d
242+    inc     r1d
243+%endmacro
244+
245 cglobal pixel_ads_mvs, 0,7,0
246 ads_mvs:
247-%ifdef ARCH_X86_64
248+    lea     r6,  [r4+r5+15]
249+    and     r6,  ~15;
250     ; mvs = r4
251-    ; masks = rsp
252+    ; masks = r6
253     ; width = r5
254     ; clear last block in case width isn't divisible by 8. (assume divisible by 4, so clearing 4 bytes is enough.)
255-%ifdef WIN64
256-    mov     r8, r4
257-    mov     r9, r5
258-%endif
259-    xor     eax, eax
260-    xor     esi, esi
261-    mov     dword [rsp+r9], 0
262+    xor     r0d, r0d
263+    xor     r1d, r1d
264+    mov     [r6+r5], r0d
265     jmp .loopi
266+ALIGN 16
267 .loopi0:
268-    add     esi, 8
269-    cmp     esi, r9d
270+    add     r1d, 8
271+    cmp     r1d, r5d
272     jge .end
273 .loopi:
274-    mov     rdi, [rsp+rsi]
275-    test    rdi, rdi
276+    mov     r2,  [r6+r1]
277+%ifdef ARCH_X86_64
278+    test    r2,  r2
279+%else
280+    mov     r3,  r2
281+    or      r3d, [r6+r1+4]
282+%endif
283     jz .loopi0
284-    xor     ecx, ecx
285-%macro TEST 1
286-    mov     [r8+rax*2], si
287-    test    edi, 0xff<<(%1*8)
288-    setne   cl
289-    add     eax, ecx
290-    inc     esi
291-%endmacro
292+    xor     r3d, r3d
293     TEST 0
294     TEST 1
295     TEST 2
296     TEST 3
297-    shr     rdi, 32
298+%ifdef ARCH_X86_64
299+    shr     r2,  32
300+%else
301+    mov     r2d, [r6+r1]
302+%endif
303     TEST 0
304     TEST 1
305     TEST 2
306     TEST 3
307-    cmp     esi, r9d
308-    jl .loopi
309-.end:
310-    mov     rsp, r10
311-    RET
312-
313-%else
314-    xor     eax, eax
315-    xor     esi, esi
316-    mov     ebx, [ebp+stack_offset+20] ; mvs
317-    mov     edi, [ebp+stack_offset+24] ; width
318-    mov     dword [esp+edi], 0
319-    push    ebp
320-    jmp .loopi
321-.loopi0:
322-    add     esi, 8
323-    cmp     esi, edi
324-    jge .end
325-.loopi:
326-    mov     ebp, [esp+esi+4]
327-    mov     edx, [esp+esi+8]
328-    mov     ecx, ebp
329-    or      ecx, edx
330-    jz .loopi0
331-    xor     ecx, ecx
332-%macro TEST 2
333-    mov     [ebx+eax*2], si
334-    test    %2, 0xff<<(%1*8)
335-    setne   cl
336-    add     eax, ecx
337-    inc     esi
338-%endmacro
339-    TEST 0, ebp
340-    TEST 1, ebp
341-    TEST 2, ebp
342-    TEST 3, ebp
343-    TEST 0, edx
344-    TEST 1, edx
345-    TEST 2, edx
346-    TEST 3, edx
347-    cmp     esi, edi
348+    cmp     r1d, r5d
349     jl .loopi
350 .end:
351-    pop     esp
352+    movifnidn eax, r0d
353     RET
354-%endif ; ARCH
355-
356diff --git a/encoder/me.c b/encoder/me.c
357index 291104a..19c5b2b 100644
358--- a/encoder/me.c
359+++ b/encoder/me.c
360@@ -609,7 +609,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
361             if( h->mb.i_me_method == X264_ME_TESA )
362             {
363                 // ADS threshold, then SAD threshold, then keep the best few SADs, then SATD
364-                mvsad_t *mvsads = (mvsad_t *)(xs + ((width+15)&~15));
365+                mvsad_t *mvsads = (mvsad_t *)(xs + ((width+15)&~15) + 4);
366                 int nmvsad = 0, limit;
367                 int sad_thresh = i_me_range <= 16 ? 10 : i_me_range <= 24 ? 11 : 12;
368                 int bsad = h->pixf.sad[i_pixel]( p_fenc, FENC_STRIDE, p_fref_w+bmy*stride+bmx, stride )
369-- 
3701.7.1
371
372
373From 4f74306c2f266bfc671ad99e9027b816dd423ece Mon Sep 17 00:00:00 2001
374From: Jason Garrett-Glaser <darkshikari@gmail.com>
375Date: Mon, 28 Jun 2010 15:02:33 -0700
376Subject: [PATCH 2/7] Callback feature for low-latency per-slice output
377 Add a callback to allow the calling application to send slices immediately after being encoded.
378 Also add some extra information to the x264_nal_t structure to help inform such a calling application how the NAL units should be ordered.
379
380Full documentation is in x264.h.
381---
382 common/bitstream.c |    7 ++-
383 common/bitstream.h |    1 -
384 encoder/encoder.c  |   26 ++++++++---
385 x264.h             |  128 +++++++++++++++++++++++++++++++++-------------------
386 4 files changed, 105 insertions(+), 57 deletions(-)
387
388diff --git a/common/bitstream.c b/common/bitstream.c
389index 0aaac21..ad8c16e 100644
390--- a/common/bitstream.c
391+++ b/common/bitstream.c
392@@ -44,7 +44,7 @@ uint8_t *x264_nal_escape_sse2( uint8_t *dst, uint8_t *src, uint8_t *end );
393 /****************************************************************************
394  * x264_nal_encode:
395  ****************************************************************************/
396-int x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal, int b_long_startcode )
397+void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal )
398 {
399     uint8_t *src = nal->p_payload;
400     uint8_t *end = nal->p_payload + nal->i_payload;
401@@ -52,7 +52,7 @@ int x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal, int b_long_startc
402 
403     if( h->param.b_annexb )
404     {
405-        if( b_long_startcode )
406+        if( nal->b_long_startcode )
407             *dst++ = 0x00;
408         *dst++ = 0x00;
409         *dst++ = 0x00;
410@@ -77,7 +77,8 @@ int x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal, int b_long_startc
411         orig_dst[3] = size>> 0;
412     }
413 
414-    return size+4;
415+    nal->i_payload = size+4;
416+    nal->p_payload = orig_dst;
417 }
418 
419 void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf )
420diff --git a/common/bitstream.h b/common/bitstream.h
421index 9ce5bd7..dd8118d 100644
422--- a/common/bitstream.h
423+++ b/common/bitstream.h
424@@ -68,7 +68,6 @@ typedef struct
425     uint8_t *(*nal_escape) ( uint8_t *dst, uint8_t *src, uint8_t *end );
426 } x264_bitstream_function_t;
427 
428-int x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal, int b_long_startcode );
429 void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf );
430 
431 /* A larger level table size theoretically could help a bit at extremely
432diff --git a/encoder/encoder.c b/encoder/encoder.c
433index f54fe85..fe97aef 100644
434--- a/encoder/encoder.c
435+++ b/encoder/encoder.c
436@@ -427,6 +427,8 @@ static int x264_validate_parameters( x264_t *h )
437     else
438         h->param.b_sliced_threads = 0;
439     h->i_thread_frames = h->param.b_sliced_threads ? 1 : h->param.i_threads;
440+    if( h->i_thread_frames > 1 )
441+        h->param.nalu_process = NULL;
442 
443     if( h->param.b_interlaced )
444     {
445@@ -1253,8 +1255,9 @@ static void x264_nal_start( x264_t *h, int i_type, int i_ref_idc )
446 {
447     x264_nal_t *nal = &h->out.nal[h->out.i_nal];
448 
449-    nal->i_ref_idc = i_ref_idc;
450-    nal->i_type    = i_type;
451+    nal->i_ref_idc        = i_ref_idc;
452+    nal->i_type           = i_type;
453+    nal->b_long_startcode = 1;
454 
455     nal->i_payload= 0;
456     nal->p_payload= &h->out.p_bitstream[bs_pos( &h->out.bs ) / 8];
457@@ -1280,6 +1283,8 @@ static int x264_nal_end( x264_t *h )
458 {
459     x264_nal_t *nal = &h->out.nal[h->out.i_nal];
460     nal->i_payload = &h->out.p_bitstream[bs_pos( &h->out.bs ) / 8] - nal->p_payload;
461+    if( h->param.nalu_process )
462+        h->param.nalu_process( h, nal );
463     h->out.i_nal++;
464 
465     return x264_nal_check_buffer( h );
466@@ -1289,6 +1294,13 @@ static int x264_encoder_encapsulate_nals( x264_t *h, int start )
467 {
468     int nal_size = 0, previous_nal_size = 0;
469 
470+    if( h->param.nalu_process )
471+    {
472+        for( int i = start; i < h->out.i_nal; i++ )
473+            nal_size += h->out.nal[i].i_payload;
474+        return nal_size;
475+    }
476+
477     for( int i = 0; i < start; i++ )
478         previous_nal_size += h->out.nal[i].i_payload;
479 
480@@ -1311,11 +1323,9 @@ static int x264_encoder_encapsulate_nals( x264_t *h, int start )
481 
482     for( int i = start; i < h->out.i_nal; i++ )
483     {
484-        int long_startcode = !i || h->out.nal[i].i_type == NAL_SPS || h->out.nal[i].i_type == NAL_PPS;
485-        int size = x264_nal_encode( h, nal_buffer, &h->out.nal[i], long_startcode );
486-        h->out.nal[i].i_payload = size;
487-        h->out.nal[i].p_payload = nal_buffer;
488-        nal_buffer += size;
489+        h->out.nal[i].b_long_startcode = !i || h->out.nal[i].i_type == NAL_SPS || h->out.nal[i].i_type == NAL_PPS;
490+        x264_nal_encode( h, nal_buffer, &h->out.nal[i] );
491+        nal_buffer += h->out.nal[i].i_payload;
492     }
493 
494     x264_emms();
495@@ -1805,6 +1815,7 @@ static int x264_slice_write( x264_t *h )
496 
497     /* Slice */
498     x264_nal_start( h, h->i_nal_type, h->i_nal_ref_idc );
499+    h->out.nal[h->out.i_nal].i_first_mb = h->sh.i_first_mb;
500 
501     /* Slice header */
502     x264_macroblock_thread_init( h );
503@@ -2020,6 +2031,7 @@ static int x264_slice_write( x264_t *h )
504             i_mb_x = 0;
505         }
506     }
507+    h->out.nal[h->out.i_nal].i_last_mb = h->sh.i_last_mb;
508 
509     if( h->param.b_cabac )
510     {
511diff --git a/x264.h b/x264.h
512index 1138a8b..e1ae084 100644
513--- a/x264.h
514+++ b/x264.h
515@@ -35,13 +35,61 @@
516 
517 #include <stdarg.h>
518 
519-#define X264_BUILD 100
520+#define X264_BUILD 101
521 
522 /* x264_t:
523  *      opaque handler for encoder */
524 typedef struct x264_t x264_t;
525 
526 /****************************************************************************
527+ * NAL structure and functions
528+ ****************************************************************************/
529+
530+enum nal_unit_type_e
531+{
532+    NAL_UNKNOWN     = 0,
533+    NAL_SLICE       = 1,
534+    NAL_SLICE_DPA   = 2,
535+    NAL_SLICE_DPB   = 3,
536+    NAL_SLICE_DPC   = 4,
537+    NAL_SLICE_IDR   = 5,    /* ref_idc != 0 */
538+    NAL_SEI         = 6,    /* ref_idc == 0 */
539+    NAL_SPS         = 7,
540+    NAL_PPS         = 8,
541+    NAL_AUD         = 9,
542+    NAL_FILLER      = 12,
543+    /* ref_idc == 0 for 6,9,10,11,12 */
544+};
545+enum nal_priority_e
546+{
547+    NAL_PRIORITY_DISPOSABLE = 0,
548+    NAL_PRIORITY_LOW        = 1,
549+    NAL_PRIORITY_HIGH       = 2,
550+    NAL_PRIORITY_HIGHEST    = 3,
551+};
552+
553+/* The data within the payload is already NAL-encapsulated; the ref_idc and type
554+ * are merely in the struct for easy access by the calling application.
555+ * All data returned in an x264_nal_t, including the data in p_payload, is no longer
556+ * valid after the next call to x264_encoder_encode.  Thus it must be used or copied
557+ * before calling x264_encoder_encode or x264_encoder_headers again. */
558+typedef struct
559+{
560+    int i_ref_idc;  /* nal_priority_e */
561+    int i_type;     /* nal_unit_type_e */
562+    int b_long_startcode;
563+    int i_first_mb; /* If this NAL is a slice, the index of the first MB in the slice. */
564+    int i_last_mb;  /* If this NAL is a slice, the index of the last MB in the slice. */
565+
566+    /* Size of payload in bytes. */
567+    int     i_payload;
568+    /* If param->b_annexb is set, Annex-B bytestream with startcode.
569+     * Otherwise, startcode is replaced with a 4-byte size.
570+     * This size is the size used in mp4/similar muxing; it is equal to i_payload-4 */
571+    uint8_t *p_payload;
572+} x264_nal_t;
573+
574+/****************************************************************************
575  * Encoder parameters
576  ****************************************************************************/
577 /* CPU flags
578@@ -377,8 +425,41 @@ typedef struct x264_param_t
579      * i.e. when an x264_param_t is passed to x264_t in an x264_picture_t or in zones.
580      * Not used when x264_encoder_reconfig is called directly. */
581     void (*param_free)( void* );
582+
583+    /* Optional low-level callback for low-latency encoding.  Called for each output NAL unit
584+     * immediately after the NAL unit is finished encoding.  This allows the calling application
585+     * to begin processing video data (e.g. by sending packets over a network) before the frame
586+     * is done encoding.
587+     *
588+     * This callback MUST do the following in order to work correctly:
589+     * 1) Have available an output buffer of at least size nal->i_payload*3/2 + 5 + 16.
590+     * 2) Call x264_nal_encode( h, dst, nal ), where dst is the output buffer.
591+     * After these steps, the content of nal is valid and can be used in the same way as if
592+     * the NAL unit were output by x264_encoder_encode.
593+     *
594+     * This does not need to be synchronous with the encoding process: the data pointed to
595+     * by nal (both before and after x264_nal_encode) will remain valid until the next
596+     * x264_encoder_encode call.  The callback must be re-entrant.
597+     *
598+     * This callback does not work with frame-based threads; threads must be disabled
599+     * or sliced-threads enabled.  This callback also does not work as one would expect
600+     * with HRD -- since the buffering period SEI cannot be calculated until the frame
601+     * is finished encoding, it will not be sent via this callback.
602+     *
603+     * Note also that the NALs are not necessarily returned in order when sliced threads is
604+     * enabled.  Accordingly, the variable i_first_mb and i_last_mb are available in
605+     * x264_nal_t to help the calling application reorder the slices if necessary.
606+     *
607+     * When this callback is enabled, x264_encoder_encode does not return valid NALs;
608+     * the calling application is expected to acquire all output NALs through the callback.
609+     *
610+     * It is generally sensible to combine this callback with a use of slice-max-mbs or
611+     * slice-max-size. */
612+    void (*nalu_process) ( x264_t *h, x264_nal_t *nal );
613 } x264_param_t;
614 
615+void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal );
616+
617 /****************************************************************************
618  * H.264 level restriction information
619  ****************************************************************************/
620@@ -586,51 +667,6 @@ int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_heigh
621 void x264_picture_clean( x264_picture_t *pic );
622 
623 /****************************************************************************
624- * NAL structure and functions
625- ****************************************************************************/
626-
627-enum nal_unit_type_e
628-{
629-    NAL_UNKNOWN     = 0,
630-    NAL_SLICE       = 1,
631-    NAL_SLICE_DPA   = 2,
632-    NAL_SLICE_DPB   = 3,
633-    NAL_SLICE_DPC   = 4,
634-    NAL_SLICE_IDR   = 5,    /* ref_idc != 0 */
635-    NAL_SEI         = 6,    /* ref_idc == 0 */
636-    NAL_SPS         = 7,
637-    NAL_PPS         = 8,
638-    NAL_AUD         = 9,
639-    NAL_FILLER      = 12,
640-    /* ref_idc == 0 for 6,9,10,11,12 */
641-};
642-enum nal_priority_e
643-{
644-    NAL_PRIORITY_DISPOSABLE = 0,
645-    NAL_PRIORITY_LOW        = 1,
646-    NAL_PRIORITY_HIGH       = 2,
647-    NAL_PRIORITY_HIGHEST    = 3,
648-};
649-
650-/* The data within the payload is already NAL-encapsulated; the ref_idc and type
651- * are merely in the struct for easy access by the calling application.
652- * All data returned in an x264_nal_t, including the data in p_payload, is no longer
653- * valid after the next call to x264_encoder_encode.  Thus it must be used or copied
654- * before calling x264_encoder_encode or x264_encoder_headers again. */
655-typedef struct
656-{
657-    int i_ref_idc;  /* nal_priority_e */
658-    int i_type;     /* nal_unit_type_e */
659-
660-    /* Size of payload in bytes. */
661-    int     i_payload;
662-    /* If param->b_annexb is set, Annex-B bytestream with 4-byte startcode.
663-     * Otherwise, startcode is replaced with a 4-byte size.
664-     * This size is the size used in mp4/similar muxing; it is equal to i_payload-4 */
665-    uint8_t *p_payload;
666-} x264_nal_t;
667-
668-/****************************************************************************
669  * Encoder functions
670  ****************************************************************************/
671 
672-- 
6731.7.1
674
675
676From 22bf1672adafa4e938a13952b8f71cd7548d31f1 Mon Sep 17 00:00:00 2001
677From: Lamont Alston <wewk584@gmail.com>
678Date: Tue, 29 Jun 2010 10:11:42 -0700
679Subject: [PATCH 3/7] Make open-GOP Blu-ray compatible
680 Blu-ray is even more braindamaged than we thought.
681 Accordingly, open-gop options are now "normal" and "bluray", as opposed to display and coded.
682 Normal should be used in all cases besides Blu-ray authoring.
683
684---
685 encoder/encoder.c   |    2 +-
686 encoder/slicetype.c |   28 +++++++---------------------
687 x264.c              |    8 ++++----
688 x264.h              |    8 ++++----
689 4 files changed, 16 insertions(+), 30 deletions(-)
690
691diff --git a/encoder/encoder.c b/encoder/encoder.c
692index fe97aef..5cd3307 100644
693--- a/encoder/encoder.c
694+++ b/encoder/encoder.c
695@@ -577,7 +577,7 @@ static int x264_validate_parameters( x264_t *h )
696         h->param.analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
697     }
698     h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_MIN( X264_BFRAME_MAX, h->param.i_keyint_max-1 ) );
699-    h->param.i_open_gop = x264_clip3( h->param.i_open_gop, X264_OPEN_GOP_NONE, X264_OPEN_GOP_CODED_ORDER );
700+    h->param.i_open_gop = x264_clip3( h->param.i_open_gop, X264_OPEN_GOP_NONE, X264_OPEN_GOP_BLURAY );
701     if( h->param.i_keyint_max == 1 )
702         h->param.b_intra_refresh = 0;
703     h->param.i_bframe_bias = x264_clip3( h->param.i_bframe_bias, -90, 100 );
704diff --git a/encoder/slicetype.c b/encoder/slicetype.c
705index 2703f02..4ede8cf 100644
706--- a/encoder/slicetype.c
707+++ b/encoder/slicetype.c
708@@ -1233,17 +1233,11 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
709     if( !h->param.b_intra_refresh )
710         for( int i = keyint_limit+1; i <= num_frames; i += h->param.i_keyint_max )
711         {
712-            int j = i;
713-            if( h->param.i_open_gop == X264_OPEN_GOP_CODED_ORDER )
714-            {
715-                while( IS_X264_TYPE_B( frames[i]->i_type ) )
716-                    i++;
717-                while( IS_X264_TYPE_B( frames[j-1]->i_type ) )
718-                    j--;
719-            }
720             frames[i]->i_type = X264_TYPE_I;
721             reset_start = X264_MIN( reset_start, i+1 );
722-            i = j;
723+            if( h->param.i_open_gop == X264_OPEN_GOP_BLURAY )
724+                while( IS_X264_TYPE_B( frames[i-1]->i_type ) )
725+                    i--;
726         }
727 
728     if( vbv_lookahead )
729@@ -1337,16 +1331,8 @@ void x264_slicetype_decide( x264_t *h )
730             if( frm->i_type == X264_TYPE_AUTO || frm->i_type == X264_TYPE_I )
731                 frm->i_type = h->param.i_open_gop && h->lookahead->i_last_keyframe >= 0 ? X264_TYPE_I : X264_TYPE_IDR;
732             int warn = frm->i_type != X264_TYPE_IDR;
733-            if( warn && h->param.i_open_gop == X264_OPEN_GOP_DISPLAY_ORDER )
734-                warn &= frm->i_type != X264_TYPE_I && frm->i_type != X264_TYPE_KEYFRAME;
735-            if( warn && h->param.i_open_gop == X264_OPEN_GOP_CODED_ORDER )
736-            {
737-                /* if this minigop ends with i, it's not a violation */
738-                int j = bframes;
739-                while( IS_X264_TYPE_B( h->lookahead->next.list[j]->i_type ) )
740-                    j++;
741-                warn = h->lookahead->next.list[j]->i_type != X264_TYPE_I && h->lookahead->next.list[j]->i_type != X264_TYPE_KEYFRAME;
742-            }
743+            if( warn && h->param.i_open_gop )
744+                warn &= frm->i_type != X264_TYPE_I;
745             if( warn )
746                 x264_log( h, X264_LOG_WARNING, "specified frame type (%d) at %d is not compatible with keyframe interval\n", frm->i_type, frm->i_frame );
747         }
748@@ -1355,8 +1341,8 @@ void x264_slicetype_decide( x264_t *h )
749             if( h->param.i_open_gop )
750             {
751                 h->lookahead->i_last_keyframe = frm->i_frame; // Use display order
752-                if( h->param.i_open_gop == X264_OPEN_GOP_CODED_ORDER )
753-                    h->lookahead->i_last_keyframe -= bframes; // Use coded order
754+                if( h->param.i_open_gop == X264_OPEN_GOP_BLURAY )
755+                    h->lookahead->i_last_keyframe -= bframes; // Use bluray order
756                 frm->b_keyframe = 1;
757             }
758             else
759diff --git a/x264.c b/x264.c
760index df04385..f08ab41 100644
761--- a/x264.c
762+++ b/x264.c
763@@ -382,10 +382,10 @@ static void Help( x264_param_t *defaults, int longhelp )
764         "                                  - normal: Non-strict (not Blu-ray compatible)\n",
765         strtable_lookup( x264_b_pyramid_names, defaults->i_bframe_pyramid ) );
766     H1( "      --open-gop <string>     Use recovery points to close GOPs [none]\n"
767-        "                                  - none: Use standard closed GOPs\n"
768-        "                                  - display: Base GOP length on display order\n"
769-        "                                             (not Blu-ray compatible)\n"
770-        "                                  - coded: Base GOP length on coded order\n"
771+        "                                  - none: closed GOPs only\n"
772+        "                                  - normal: standard open GOPs\n"
773+        "                                            (not Blu-ray compatible)\n"
774+        "                                  - bluray: Blu-ray-compatible open GOPs\n"
775         "                              Only available with b-frames\n" );
776     H1( "      --no-cabac              Disable CABAC\n" );
777     H1( "  -r, --ref <integer>         Number of reference frames [%d]\n", defaults->i_frame_reference );
778diff --git a/x264.h b/x264.h
779index e1ae084..86f7426 100644
780--- a/x264.h
781+++ b/x264.h
782@@ -153,8 +153,8 @@ typedef struct
783 #define X264_B_PYRAMID_NORMAL        2
784 #define X264_KEYINT_MIN_AUTO         0
785 #define X264_OPEN_GOP_NONE           0
786-#define X264_OPEN_GOP_DISPLAY_ORDER  1
787-#define X264_OPEN_GOP_CODED_ORDER    2
788+#define X264_OPEN_GOP_NORMAL         1
789+#define X264_OPEN_GOP_BLURAY         2
790 
791 static const char * const x264_direct_pred_names[] = { "none", "spatial", "temporal", "auto", 0 };
792 static const char * const x264_motion_est_names[] = { "dia", "hex", "umh", "esa", "tesa", 0 };
793@@ -166,7 +166,7 @@ static const char * const x264_colorprim_names[] = { "", "bt709", "undef", "", "
794 static const char * const x264_transfer_names[] = { "", "bt709", "undef", "", "bt470m", "bt470bg", "smpte170m", "smpte240m", "linear", "log100", "log316", 0 };
795 static const char * const x264_colmatrix_names[] = { "GBR", "bt709", "undef", "", "fcc", "bt470bg", "smpte170m", "smpte240m", "YCgCo", 0 };
796 static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 };
797-static const char * const x264_open_gop_names[] = { "none", "display", "coded", 0 };
798+static const char * const x264_open_gop_names[] = { "none", "normal", "bluray", 0 };
799 
800 /* Colorspace type
801  * legacy only; nothing other than I420 is really supported. */
802@@ -276,7 +276,7 @@ typedef struct x264_param_t
803     int         i_bframe_adaptive;
804     int         i_bframe_bias;
805     int         i_bframe_pyramid;   /* Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal */
806-    int         i_open_gop;         /* Open gop: 1=display order, 2=coded order to determine gop size */
807+    int         i_open_gop;         /* Open gop: 1=display order, 2=bluray compatibility braindamage mode */
808 
809     int         b_deblocking_filter;
810     int         i_deblocking_filter_alphac0;    /* [-6, 6] -6 light filter, 6 strong */
811-- 
8121.7.1
813
814
815From ce052b8bae0bed131716d3ed655b974a1a2ebcb5 Mon Sep 17 00:00:00 2001
816From: Steven Walters <kemuri9@gmail.com>
817Date: Sat, 26 Jun 2010 16:28:49 -0400
818Subject: [PATCH 4/7] Centralize logging within x264cli
819 x264cli messages will now respect the log level they pertain to.
820 Slightly reduces binary size.
821
822---
823 input/avs.c             |   88 +++++-------------
824 input/ffms.c            |   58 +++--------
825 input/input.h           |    2 +
826 input/lavf.c            |   55 +++--------
827 input/thread.c          |    9 +-
828 input/timecode.c        |  111 ++++++----------------
829 input/y4m.c             |   23 +----
830 input/yuv.c             |    8 +-
831 muxers.h                |   61 ------------
832 output/flv.c            |   10 +-
833 output/flv_bytestream.c |    2 +-
834 output/matroska.c       |    2 +-
835 output/matroska_ebml.c  |    2 +-
836 output/mp4.c            |   12 +--
837 output/output.h         |    2 +
838 output/raw.c            |    2 +-
839 x264.c                  |  246 +++++++++++++++++++++--------------------------
840 x264cli.h               |   67 +++++++++++++
841 18 files changed, 289 insertions(+), 471 deletions(-)
842 delete mode 100644 muxers.h
843 create mode 100644 x264cli.h
844
845diff --git a/input/avs.c b/input/avs.c
846index 07add40..b83f715 100644
847--- a/input/avs.c
848+++ b/input/avs.c
849@@ -20,8 +20,9 @@
850  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
851  *****************************************************************************/
852 
853-#include "muxers.h"
854+#include "input.h"
855 #include <windows.h>
856+#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "avs", __VA_ARGS__ )
857 
858 /* the AVS interface currently uses __declspec to link function declarations to their definitions in the dll.
859    this has a side effect of preventing program execution if the avisynth dll is not found,
860@@ -131,27 +132,15 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
861     FILE *fh = fopen( psz_filename, "r" );
862     if( !fh )
863         return -1;
864-    else if( !x264_is_regular_file( fh ) )
865-    {
866-        fprintf( stderr, "avs [error]: AVS input is incompatible with non-regular file `%s'\n", psz_filename );
867-        return -1;
868-    }
869+    FAIL_IF_ERROR( !x264_is_regular_file( fh ), "AVS input is incompatible with non-regular file `%s'\n", psz_filename );
870     fclose( fh );
871 
872     avs_hnd_t *h = malloc( sizeof(avs_hnd_t) );
873     if( !h )
874         return -1;
875-    if( avs_load_library( h ) )
876-    {
877-        fprintf( stderr, "avs [error]: failed to load avisynth\n" );
878-        return -1;
879-    }
880+    FAIL_IF_ERROR( avs_load_library( h ), "failed to load avisynth\n" )
881     h->env = h->func.avs_create_script_environment( AVS_INTERFACE_YV12 );
882-    if( !h->env )
883-    {
884-        fprintf( stderr, "avs [error]: failed to initiate avisynth\n" );
885-        return -1;
886-    }
887+    FAIL_IF_ERROR( !h->env, "failed to initiate avisynth\n" )
888     AVS_Value arg = avs_new_value_string( psz_filename );
889     AVS_Value res;
890     char *filename_ext = get_filename_extension( psz_filename );
891@@ -159,11 +148,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
892     if( !strcasecmp( filename_ext, "avs" ) )
893     {
894         res = h->func.avs_invoke( h->env, "Import", arg, NULL );
895-        if( avs_is_error( res ) )
896-        {
897-            fprintf( stderr, "avs [error]: %s\n", avs_as_string( res ) );
898-            return -1;
899-        }
900+        FAIL_IF_ERROR( avs_is_error( res ), "%s\n", avs_as_string( res ) )
901         /* check if the user is using a multi-threaded script and apply distributor if necessary.
902            adapted from avisynth's vfw interface */
903         AVS_Value mt_test = h->func.avs_invoke( h->env, "GetMTMode", avs_new_value_bool( 0 ), NULL );
904@@ -184,78 +169,55 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
905         int i;
906         for( i = 0; filter[i]; i++ )
907         {
908-            fprintf( stderr, "avs [info]: trying %s... ", filter[i] );
909+            x264_cli_log( "avs", X264_LOG_INFO, "trying %s... ", filter[i] );
910             if( !h->func.avs_function_exists( h->env, filter[i] ) )
911             {
912-                fprintf( stderr, "not found\n" );
913+                x264_cli_printf( X264_LOG_INFO, "not found\n" );
914                 continue;
915             }
916             if( !strncasecmp( filter[i], "FFmpegSource", 12 ) )
917             {
918-                fprintf( stderr, "indexing... " );
919+                x264_cli_printf( X264_LOG_INFO, "indexing... " );
920                 fflush( stderr );
921             }
922             res = h->func.avs_invoke( h->env, filter[i], arg, NULL );
923             if( !avs_is_error( res ) )
924             {
925-                fprintf( stderr, "succeeded\n" );
926+                x264_cli_printf( X264_LOG_INFO, "succeeded\n" );
927                 break;
928             }
929-            fprintf( stderr, "failed\n" );
930-        }
931-        if( !filter[i] )
932-        {
933-            fprintf( stderr, "avs [error]: unable to find source filter to open `%s'\n", psz_filename );
934-            return -1;
935+            x264_cli_printf( X264_LOG_INFO, "failed\n" );
936         }
937+        FAIL_IF_ERROR( !filter[i], "unable to find source filter to open `%s'\n", psz_filename )
938     }
939-    if( !avs_is_clip( res ) )
940-    {
941-        fprintf( stderr, "avs [error]: `%s' didn't return a video clip\n", psz_filename );
942-        return -1;
943-    }
944+    FAIL_IF_ERROR( !avs_is_clip( res ), "`%s' didn't return a video clip\n", psz_filename )
945     h->clip = h->func.avs_take_clip( res, h->env );
946     int avs_version = h->func.avs_get_version( h->clip );
947     const AVS_VideoInfo *vi = h->func.avs_get_video_info( h->clip );
948-    if( !avs_has_video( vi ) )
949-    {
950-        fprintf( stderr, "avs [error]: `%s' has no video data\n", psz_filename );
951-        return -1;
952-    }
953+    FAIL_IF_ERROR( !avs_has_video( vi ), "`%s' has no video data\n", psz_filename )
954     /* if the clip is made of fields instead of frames, call weave to make them frames */
955     if( avs_is_field_based( vi ) )
956     {
957-        fprintf( stderr, "avs [warning]: detected fieldbased (separated) input, weaving to frames\n" );
958+        x264_cli_log( "avs", X264_LOG_WARNING, "detected fieldbased (separated) input, weaving to frames\n" );
959         AVS_Value tmp = h->func.avs_invoke( h->env, "Weave", res, NULL );
960-        if( avs_is_error( tmp ) )
961-        {
962-            fprintf( stderr, "avs [error]: couldn't weave fields into frames\n" );
963-            return -1;
964-        }
965+        FAIL_IF_ERROR( avs_is_error( tmp ), "couldn't weave fields into frames\n" )
966         res = update_clip( h, &vi, tmp, res );
967         info->interlaced = 1;
968         info->tff = avs_is_tff( vi );
969     }
970-    if( vi->width&1 || vi->height&1 )
971-    {
972-        fprintf( stderr, "avs [error]: input clip width or height not divisible by 2 (%dx%d)\n",
973-                 vi->width, vi->height );
974-        return -1;
975-    }
976+    FAIL_IF_ERROR( vi->width&1 || vi->height&1, "input clip width or height not divisible by 2 (%dx%d)\n", vi->width, vi->height )
977     /* always call ConvertToYV12 to convert non YV12 planar colorspaces to YV12 when user's AVS supports them,
978        as all planar colorspaces are flagged as YV12. If it is already YV12 in this case, the call does nothing */
979     if( !avs_is_yv12( vi ) || avs_version >= AVS_INTERFACE_OTHER_PLANAR )
980     {
981-        fprintf( stderr, "avs %s\n", !avs_is_yv12( vi ) ? "[warning]: converting input clip to YV12"
982-               : "[info]: avisynth 2.6+ detected, forcing conversion to YV12" );
983+        if( !avs_is_yv12( vi ) )
984+            x264_cli_log( "avs", X264_LOG_WARNING, "converting input clip to YV12" );
985+        else
986+            x264_cli_log( "avs", X264_LOG_INFO, "avisynth 2.6+ detected, forcing conversion to YV12" );
987         const char *arg_name[2] = { NULL, "interlaced" };
988         AVS_Value arg_arr[2] = { res, avs_new_value_bool( info->interlaced ) };
989         AVS_Value res2 = h->func.avs_invoke( h->env, "ConvertToYV12", avs_new_value_array( arg_arr, 2 ), arg_name );
990-        if( avs_is_error( res2 ) )
991-        {
992-            fprintf( stderr, "avs [error]: couldn't convert input clip to YV12\n" );
993-            return -1;
994-        }
995+        FAIL_IF_ERROR( avs_is_error( res2 ), "couldn't convert input clip to YV12\n" )
996         res = update_clip( h, &vi, res2, res );
997     }
998     h->func.avs_release_value( res );
999@@ -294,11 +256,7 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
1000         return -1;
1001     AVS_VideoFrame *frm = p_pic->opaque = h->func.avs_get_frame( h->clip, i_frame );
1002     const char *err = h->func.avs_clip_get_error( h->clip );
1003-    if( err )
1004-    {
1005-        fprintf( stderr, "avs [error]: %s occurred while reading frame %d\n", err, i_frame );
1006-        return -1;
1007-    }
1008+    FAIL_IF_ERROR( err, "%s occurred while reading frame %d\n", err, i_frame )
1009     for( int i = 0; i < 3; i++ )
1010     {
1011         /* explicitly cast away the const attribute to avoid a warning */
1012diff --git a/input/ffms.c b/input/ffms.c
1013index b2a253e..fe8bf7e 100644
1014--- a/input/ffms.c
1015+++ b/input/ffms.c
1016@@ -21,8 +21,10 @@
1017  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
1018  *****************************************************************************/
1019 
1020-#include "muxers.h"
1021+#include "input.h"
1022 #include <ffms.h>
1023+#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "ffms", __VA_ARGS__ )
1024+
1025 #undef DECLARE_ALIGNED
1026 #include <libavcodec/avcodec.h>
1027 #include <libswscale/swscale.h>
1028@@ -86,28 +88,16 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1029     {
1030         idx = FFMS_MakeIndex( psz_filename, 0, 0, NULL, NULL, 0, update_progress, NULL, &e );
1031         fprintf( stderr, "                                            \r" );
1032-        if( !idx )
1033-        {
1034-            fprintf( stderr, "ffms [error]: could not create index\n" );
1035-            return -1;
1036-        }
1037+        FAIL_IF_ERROR( !idx, "could not create index\n" )
1038         if( opt->index_file && FFMS_WriteIndex( opt->index_file, idx, &e ) )
1039-            fprintf( stderr, "ffms [warning]: could not write index file\n" );
1040+            x264_cli_log( "ffms", X264_LOG_WARNING, "could not write index file\n" );
1041     }
1042 
1043     int trackno = FFMS_GetFirstTrackOfType( idx, FFMS_TYPE_VIDEO, &e );
1044-    if( trackno < 0 )
1045-    {
1046-        fprintf( stderr, "ffms [error]: could not find video track\n" );
1047-        return -1;
1048-    }
1049+    FAIL_IF_ERROR( trackno < 0, "could not find video track\n" )
1050 
1051     h->video_source = FFMS_CreateVideoSource( psz_filename, trackno, idx, 1, seekmode, &e );
1052-    if( !h->video_source )
1053-    {
1054-        fprintf( stderr, "ffms [error]: could not create video source\n" );
1055-        return -1;
1056-    }
1057+    FAIL_IF_ERROR( !h->video_source, "could not create video source\n" )
1058 
1059     h->track = FFMS_GetTrackFromVideo( h->video_source );
1060 
1061@@ -121,11 +111,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1062     h->vfr_input       = info->vfr;
1063 
1064     const FFMS_Frame *frame = FFMS_GetFrame( h->video_source, 0, &e );
1065-    if( !frame )
1066-    {
1067-        fprintf( stderr, "ffms [error]: could not read frame 0\n" );
1068-        return -1;
1069-    }
1070+    FAIL_IF_ERROR( !frame, "could not read frame 0\n" )
1071 
1072     h->init_width  = h->cur_width  = info->width  = frame->EncodedWidth;
1073     h->init_height = h->cur_height = info->height = frame->EncodedHeight;
1074@@ -134,8 +120,8 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1075     info->tff        = frame->TopFieldFirst;
1076 
1077     if( h->cur_pix_fmt != PIX_FMT_YUV420P )
1078-        fprintf( stderr, "ffms [warning]: converting from %s to YV12\n",
1079-                 avcodec_get_pix_fmt_name( h->cur_pix_fmt ) );
1080+        x264_cli_log( "ffms", X264_LOG_WARNING, "converting from %s to YV12\n",
1081+                       avcodec_get_pix_fmt_name( h->cur_pix_fmt ) );
1082 
1083     /* ffms timestamps are in milliseconds. ffms also uses int64_ts for timebase,
1084      * so we need to reduce large timebases to prevent overflow */
1085@@ -173,19 +159,15 @@ static int check_swscale( ffms_hnd_t *h, const FFMS_Frame *frame, int i_frame )
1086     if( h->scaler )
1087     {
1088         sws_freeContext( h->scaler );
1089-        fprintf( stderr, "ffms [warning]: stream properties changed to %dx%d, %s at frame %d  \n", frame->EncodedWidth,
1090-                 frame->EncodedHeight, avcodec_get_pix_fmt_name( frame->EncodedPixelFormat ), i_frame );
1091+        x264_cli_log( "ffms", X264_LOG_WARNING, "stream properties changed to %dx%d, %s at frame %d  \n", frame->EncodedWidth,
1092+                      frame->EncodedHeight, avcodec_get_pix_fmt_name( frame->EncodedPixelFormat ), i_frame );
1093         h->cur_width   = frame->EncodedWidth;
1094         h->cur_height  = frame->EncodedHeight;
1095         h->cur_pix_fmt = frame->EncodedPixelFormat;
1096     }
1097     h->scaler = sws_getContext( h->cur_width, h->cur_height, h->cur_pix_fmt, h->init_width, h->init_height,
1098                                 PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL );
1099-    if( !h->scaler )
1100-    {
1101-        fprintf( stderr, "ffms [error]: could not open swscale context\n" );
1102-        return -1;
1103-    }
1104+    FAIL_IF_ERROR( !h->scaler, "could not open swscale context\n" )
1105     return 0;
1106 }
1107 
1108@@ -195,11 +177,7 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
1109     FFMS_ErrorInfo e;
1110     e.BufferSize = 0;
1111     const FFMS_Frame *frame = FFMS_GetFrame( h->video_source, i_frame, &e );
1112-    if( !frame )
1113-    {
1114-        fprintf( stderr, "ffms [error]: could not read frame %d\n", i_frame );
1115-        return -1;
1116-    }
1117+    FAIL_IF_ERROR( !frame, "could not read frame %d\n", i_frame )
1118 
1119     if( check_swscale( h, frame, i_frame ) )
1120         return -1;
1121@@ -214,12 +192,8 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
1122 
1123     if( h->vfr_input )
1124     {
1125-        if( info->PTS == AV_NOPTS_VALUE )
1126-        {
1127-            fprintf( stderr, "ffms [error]: invalid timestamp. "
1128-                     "Use --force-cfr and specify a framerate with --fps\n" );
1129-            return -1;
1130-        }
1131+        FAIL_IF_ERROR( info->PTS == AV_NOPTS_VALUE, "invalid timestamp. "
1132+                       "Use --force-cfr and specify a framerate with --fps\n" )
1133 
1134         if( !h->pts_offset_flag )
1135         {
1136diff --git a/input/input.h b/input/input.h
1137index f89b13b..f588f3c 100644
1138--- a/input/input.h
1139+++ b/input/input.h
1140@@ -25,6 +25,8 @@
1141 #ifndef X264_INPUT_H
1142 #define X264_INPUT_H
1143 
1144+#include "x264cli.h"
1145+
1146 /* options that are used by only some demuxers */
1147 typedef struct
1148 {
1149diff --git a/input/lavf.c b/input/lavf.c
1150index 4b0375f..54a275f 100644
1151--- a/input/lavf.c
1152+++ b/input/lavf.c
1153@@ -21,7 +21,8 @@
1154  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
1155  *****************************************************************************/
1156 
1157-#include "muxers.h"
1158+#include "input.h"
1159+#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "lavf", __VA_ARGS__ )
1160 #undef DECLARE_ALIGNED
1161 #include <libavformat/avformat.h>
1162 #include <libswscale/swscale.h>
1163@@ -59,19 +60,15 @@ static int check_swscale( lavf_hnd_t *h, AVCodecContext *c, int i_frame )
1164     if( h->scaler )
1165     {
1166         sws_freeContext( h->scaler );
1167-        fprintf( stderr, "lavf [warning]: stream properties changed to %dx%d, %s at frame %d  \n",
1168-                 c->width, c->height, avcodec_get_pix_fmt_name( c->pix_fmt ), i_frame );
1169+        x264_cli_log( "lavf", X264_LOG_WARNING, "stream properties changed to %dx%d, %s at frame %d  \n",
1170+                      c->width, c->height, avcodec_get_pix_fmt_name( c->pix_fmt ), i_frame );
1171         h->cur_width   = c->width;
1172         h->cur_height  = c->height;
1173         h->cur_pix_fmt = c->pix_fmt;
1174     }
1175     h->scaler = sws_getContext( h->cur_width, h->cur_height, h->cur_pix_fmt, h->init_width, h->init_height,
1176                                 PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL );
1177-    if( !h->scaler )
1178-    {
1179-        fprintf( stderr, "lavf [error]: could not open swscale context\n" );
1180-        return -1;
1181-    }
1182+    FAIL_IF_ERROR( !h->scaler, "could not open swscale context\n" )
1183     return 0;
1184 }
1185 
1186@@ -106,12 +103,12 @@ static int read_frame_internal( x264_picture_t *p_pic, lavf_hnd_t *h, int i_fram
1187             {
1188                 c->reordered_opaque = pkt->pts;
1189                 if( avcodec_decode_video2( c, frame, &finished, pkt ) < 0 )
1190-                    fprintf( stderr, "lavf [warning]: video decoding failed on frame %d\n", h->next_frame );
1191+                    x264_cli_log( "lavf", X264_LOG_WARNING, "video decoding failed on frame %d\n", h->next_frame );
1192             }
1193         if( !finished )
1194         {
1195             if( avcodec_decode_video2( c, frame, &finished, pkt ) < 0 )
1196-                fprintf( stderr, "lavf [warning]: video decoding failed on frame %d\n", h->next_frame );
1197+                x264_cli_log( "lavf", X264_LOG_WARNING, "video decoding failed on frame %d\n", h->next_frame );
1198             if( !finished )
1199                 return -1;
1200         }
1201@@ -166,26 +163,13 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1202     if( !strcmp( psz_filename, "-" ) )
1203         psz_filename = "pipe:";
1204 
1205-    if( av_open_input_file( &h->lavf, psz_filename, NULL, 0, NULL ) )
1206-    {
1207-        fprintf( stderr, "lavf [error]: could not open input file\n" );
1208-        return -1;
1209-    }
1210-
1211-    if( av_find_stream_info( h->lavf ) < 0 )
1212-    {
1213-        fprintf( stderr, "lavf [error]: could not find input stream info\n" );
1214-        return -1;
1215-    }
1216+    FAIL_IF_ERROR( av_open_input_file( &h->lavf, psz_filename, NULL, 0, NULL ), "could not open input file\n" )
1217+    FAIL_IF_ERROR( av_find_stream_info( h->lavf ) < 0, "could not find input stream info\n" )
1218 
1219     int i = 0;
1220     while( i < h->lavf->nb_streams && h->lavf->streams[i]->codec->codec_type != CODEC_TYPE_VIDEO )
1221         i++;
1222-    if( i == h->lavf->nb_streams )
1223-    {
1224-        fprintf( stderr, "lavf [error]: could not find video stream\n" );
1225-        return -1;
1226-    }
1227+    FAIL_IF_ERROR( i == h->lavf->nb_streams, "could not find video stream\n" )
1228     h->stream_id       = i;
1229     h->next_frame      = 0;
1230     h->pts_offset_flag = 0;
1231@@ -207,22 +191,15 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1232         info->csp |= X264_CSP_VFLIP;
1233 
1234     if( h->cur_pix_fmt != PIX_FMT_YUV420P )
1235-        fprintf( stderr, "lavf [warning]: converting from %s to YV12\n",
1236-                 avcodec_get_pix_fmt_name( h->cur_pix_fmt ) );
1237-
1238-    if( avcodec_open( c, avcodec_find_decoder( c->codec_id ) ) )
1239-    {
1240-        fprintf( stderr, "lavf [error]: could not find decoder for video stream\n" );
1241-        return -1;
1242-    }
1243+        x264_cli_log( "lavf", X264_LOG_WARNING, "converting from %s to YV12\n",
1244+                      avcodec_get_pix_fmt_name( h->cur_pix_fmt ) );
1245+    FAIL_IF_ERROR( avcodec_open( c, avcodec_find_decoder( c->codec_id ) ),
1246+                   "could not find decoder for video stream\n" )
1247 
1248     /* prefetch the first frame and set/confirm flags */
1249     h->first_pic = malloc( sizeof(x264_picture_t) );
1250-    if( !h->first_pic || lavf_input.picture_alloc( h->first_pic, info->csp, info->width, info->height ) )
1251-    {
1252-        fprintf( stderr, "lavf [error]: malloc failed\n" );
1253-        return -1;
1254-    }
1255+    FAIL_IF_ERROR( !h->first_pic || lavf_input.picture_alloc( h->first_pic, info->csp, info->width, info->height ),
1256+                   "malloc failed\n" )
1257     else if( read_frame_internal( h->first_pic, h, 0, info ) )
1258         return -1;
1259 
1260diff --git a/input/thread.c b/input/thread.c
1261index c4b07fa..98af22b 100644
1262--- a/input/thread.c
1263+++ b/input/thread.c
1264@@ -21,7 +21,7 @@
1265  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
1266  *****************************************************************************/
1267 
1268-#include "muxers.h"
1269+#include "input.h"
1270 
1271 extern cli_input_t input;
1272 
1273@@ -47,11 +47,8 @@ typedef struct thread_input_arg_t
1274 static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, cli_input_opt_t *opt )
1275 {
1276     thread_hnd_t *h = malloc( sizeof(thread_hnd_t) );
1277-    if( !h || input.picture_alloc( &h->pic, info->csp, info->width, info->height ) )
1278-    {
1279-        fprintf( stderr, "x264 [error]: malloc failed\n" );
1280-        return -1;
1281-    }
1282+    FAIL_IF_ERR( !h || input.picture_alloc( &h->pic, info->csp, info->width, info->height ),
1283+                 "x264", "malloc failed\n" )
1284     h->input = input;
1285     h->p_handle = *p_handle;
1286     h->next_frame = -1;
1287diff --git a/input/timecode.c b/input/timecode.c
1288index a307327..7821e76 100644
1289--- a/input/timecode.c
1290+++ b/input/timecode.c
1291@@ -20,7 +20,8 @@
1292  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
1293  *****************************************************************************/
1294 
1295-#include "muxers.h"
1296+#include "input.h"
1297+#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "timecode", __VA_ARGS__ )
1298 #include <math.h>
1299 
1300 extern cli_input_t input;
1301@@ -61,12 +62,8 @@ static double correct_fps( double fps, timecode_hnd_t *h )
1302     {
1303         fps_den = i * h->timebase_num;
1304         fps_num = round( fps_den * fps_sig ) * exponent;
1305-        if( fps_num > UINT32_MAX )
1306-        {
1307-            fprintf( stderr, "timecode [error]: tcfile fps correction failed.\n"
1308-                             "                  Specify an appropriate timebase manually or remake tcfile.\n" );
1309-            return -1;
1310-        }
1311+        FAIL_IF_ERROR( fps_num > UINT32_MAX, "tcfile fps correction failed.\n"
1312+                       "                  Specify an appropriate timebase manually or remake tcfile.\n" )
1313         if( fabs( ((double)fps_num / fps_den) / exponent - fps_sig ) < DOUBLE_EPSILON )
1314             break;
1315         ++i;
1316@@ -91,12 +88,8 @@ static int try_mkv_timebase_den( double *fpss, timecode_hnd_t *h, int loop_num )
1317         double fps_sig = sigexp10( fpss[num], &exponent );
1318         fps_den = round( MKV_TIMEBASE_DEN / fps_sig ) / exponent;
1319         h->timebase_num = fps_den && h->timebase_num ? gcd( h->timebase_num, fps_den ) : fps_den;
1320-        if( h->timebase_num > UINT32_MAX || !h->timebase_num )
1321-        {
1322-            fprintf( stderr, "timecode [error]: automatic timebase generation failed.\n"
1323-                             "                  Specify timebase manually.\n" );
1324-            return -1;
1325-        }
1326+        FAIL_IF_ERROR( h->timebase_num > UINT32_MAX || !h->timebase_num, "automatic timebase generation failed.\n"
1327+                       "                  Specify timebase manually.\n" )
1328     }
1329     return 0;
1330 }
1331@@ -110,11 +103,7 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
1332     double *fpss = NULL;
1333 
1334     ret = fscanf( tcfile_in, "# timecode format v%d", &tcfv );
1335-    if( ret != 1 || (tcfv != 1 && tcfv != 2) )
1336-    {
1337-        fprintf( stderr, "timecode [error]: unsupported timecode format\n" );
1338-        return -1;
1339-    }
1340+    FAIL_IF_ERROR( ret != 1 || (tcfv != 1 && tcfv != 2), "unsupported timecode format\n" )
1341 
1342     if( tcfv == 1 )
1343     {
1344@@ -128,18 +117,11 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
1345         {
1346             if( buff[0] == '#' || buff[0] == '\n' || buff[0] == '\r' )
1347                 continue;
1348-            if( sscanf( buff, "assume %lf", &h->assume_fps ) != 1 && sscanf( buff, "Assume %lf", &h->assume_fps ) != 1 )
1349-            {
1350-                fprintf( stderr, "timecode [error]: tcfile parsing error: assumed fps not found\n" );
1351-                return -1;
1352-            }
1353+            FAIL_IF_ERROR( sscanf( buff, "assume %lf", &h->assume_fps ) != 1 && sscanf( buff, "Assume %lf", &h->assume_fps ) != 1,
1354+                           "tcfile parsing error: assumed fps not found\n" )
1355             break;
1356         }
1357-        if( h->assume_fps <= 0 )
1358-        {
1359-            fprintf( stderr, "timecode [error]: invalid assumed fps %.6f\n", h->assume_fps );
1360-            return -1;
1361-        }
1362+        FAIL_IF_ERROR( h->assume_fps <= 0, "invalid assumed fps %.6f\n", h->assume_fps )
1363 
1364         file_pos = ftell( tcfile_in );
1365         h->stored_pts_num = 0;
1366@@ -152,16 +134,9 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
1367                 continue;
1368             }
1369             ret = sscanf( buff, "%d,%d,%lf", &start, &end, &seq_fps );
1370-            if( ret != 3 && ret != EOF )
1371-            {
1372-                fprintf( stderr, "timecode [error]: invalid input tcfile\n" );
1373-                return -1;
1374-            }
1375-            if( start > end || start <= prev_start || end <= prev_end || seq_fps <= 0 )
1376-            {
1377-                fprintf( stderr, "timecode [error]: invalid input tcfile at line %d: %s\n", num, buff );
1378-                return -1;
1379-            }
1380+            FAIL_IF_ERROR( ret != 3 && ret != EOF, "invalid input tcfile\n" )
1381+            FAIL_IF_ERROR( start > end || start <= prev_start || end <= prev_end || seq_fps <= 0,
1382+                           "invalid input tcfile at line %d: %s\n", num, buff )
1383             prev_start = start;
1384             prev_end = end;
1385             if( h->auto_timebase_den || h->auto_timebase_num )
1386@@ -259,11 +234,7 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
1387             ++num;
1388         }
1389         timecodes_num = h->stored_pts_num + h->seek;
1390-        if( !timecodes_num )
1391-        {
1392-            fprintf( stderr, "timecode [error]: input tcfile doesn't have any timecodes!\n" );
1393-            return -1;
1394-        }
1395+        FAIL_IF_ERROR( !timecodes_num, "input tcfile doesn't have any timecodes!\n" )
1396         fseek( tcfile_in, file_pos, SEEK_SET );
1397 
1398         timecodes = malloc( timecodes_num * sizeof(double) );
1399@@ -272,11 +243,7 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
1400 
1401         fgets( buff, sizeof(buff), tcfile_in );
1402         ret = sscanf( buff, "%lf", &timecodes[0] );
1403-        if( ret != 1 )
1404-        {
1405-            fprintf( stderr, "timecode [error]: invalid input tcfile for frame 0\n" );
1406-            goto fail;
1407-        }
1408+        FAIL_IF_ERROR( ret != 1, "invalid input tcfile for frame 0\n" )
1409         for( num = 1; num < timecodes_num; )
1410         {
1411             fgets( buff, sizeof(buff), tcfile_in );
1412@@ -284,11 +251,8 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
1413                 continue;
1414             ret = sscanf( buff, "%lf", &timecodes[num] );
1415             timecodes[num] *= 1e-3;         /* Timecode format v2 is expressed in milliseconds. */
1416-            if( ret != 1 || timecodes[num] <= timecodes[num - 1] )
1417-            {
1418-                fprintf( stderr, "timecode [error]: invalid input tcfile for frame %d\n", num );
1419-                goto fail;
1420-            }
1421+            FAIL_IF_ERROR( ret != 1 || timecodes[num] <= timecodes[num - 1],
1422+                           "invalid input tcfile for frame %d\n", num )
1423             ++num;
1424         }
1425 
1426@@ -342,14 +306,10 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
1427         uint64_t i = gcd( h->timebase_num, h->timebase_den );
1428         h->timebase_num /= i;
1429         h->timebase_den /= i;
1430-        fprintf( stderr, "timecode [info]: automatic timebase generation %"PRIu64"/%"PRIu64"\n", h->timebase_num, h->timebase_den );
1431-    }
1432-    else if( h->timebase_den > UINT32_MAX || !h->timebase_den )
1433-    {
1434-        fprintf( stderr, "timecode [error]: automatic timebase generation failed.\n"
1435-                         "                  Specify an appropriate timebase manually.\n" );
1436-        goto fail;
1437+        x264_cli_log( "timecode", X264_LOG_INFO, "automatic timebase generation %"PRIu64"/%"PRIu64"\n", h->timebase_num, h->timebase_den );
1438     }
1439+    else FAIL_IF_ERROR( h->timebase_den > UINT32_MAX || !h->timebase_den, "automatic timebase generation failed.\n"
1440+                        "                  Specify an appropriate timebase manually.\n" )
1441 
1442     h->pts = malloc( h->stored_pts_num * sizeof(int64_t) );
1443     if( !h->pts )
1444@@ -360,11 +320,7 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
1445     {
1446         h->pts[num] = (int64_t)( timecodes[h->seek + num] * ((double)h->timebase_den / h->timebase_num) + 0.5 );
1447         h->pts[num] -= pts_seek_offset;
1448-        if( h->pts[num] <= h->pts[num - 1] )
1449-        {
1450-            fprintf( stderr, "timecode [error]: invalid timebase or timecode for frame %d\n", num );
1451-            goto fail;
1452-        }
1453+        FAIL_IF_ERROR( h->pts[num] <= h->pts[num - 1], "invalid timebase or timecode for frame %d\n", num )
1454     }
1455 
1456     free( timecodes );
1457@@ -386,11 +342,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1458     int ret = 0;
1459     FILE *tcfile_in;
1460     timecode_hnd_t *h = malloc( sizeof(timecode_hnd_t) );
1461-    if( !h )
1462-    {
1463-        fprintf( stderr, "timecode [error]: malloc failed\n" );
1464-        return -1;
1465-    }
1466+    FAIL_IF_ERROR( !h, "malloc failed\n" )
1467     h->input = input;
1468     h->p_handle = *p_handle;
1469     h->frame_total = input.get_frame_total( h->p_handle );
1470@@ -400,11 +352,8 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1471         ret = sscanf( opt->timebase, "%"SCNu64"/%"SCNu64, &h->timebase_num, &h->timebase_den );
1472         if( ret == 1 )
1473             h->timebase_num = strtoul( opt->timebase, NULL, 10 );
1474-        if( h->timebase_num > UINT32_MAX || h->timebase_den > UINT32_MAX )
1475-        {
1476-            fprintf( stderr, "timecode [error]: timebase you specified exceeds H.264 maximum\n" );
1477-            return -1;
1478-        }
1479+        FAIL_IF_ERROR( h->timebase_num > UINT32_MAX || h->timebase_den > UINT32_MAX,
1480+                       "timebase you specified exceeds H.264 maximum\n" )
1481     }
1482     h->auto_timebase_num = !ret;
1483     h->auto_timebase_den = ret < 2;
1484@@ -418,14 +367,10 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1485     *p_handle = h;
1486 
1487     tcfile_in = fopen( psz_filename, "rb" );
1488-    if( !tcfile_in )
1489-    {
1490-        fprintf( stderr, "timecode [error]: can't open `%s'\n", psz_filename );
1491-        return -1;
1492-    }
1493+    FAIL_IF_ERROR( !tcfile_in, "can't open `%s'\n", psz_filename )
1494     else if( !x264_is_regular_file( tcfile_in ) )
1495     {
1496-        fprintf( stderr, "timecode [error]: tcfile input incompatible with non-regular file `%s'\n", psz_filename );
1497+        x264_cli_log( "timecode", X264_LOG_ERROR, "tcfile input incompatible with non-regular file `%s'\n", psz_filename );
1498         fclose( tcfile_in );
1499         return -1;
1500     }
1501@@ -466,8 +411,8 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
1502     {
1503         if( h->pts )
1504         {
1505-            fprintf( stderr, "timecode [info]: input timecode file missing data for frame %d and later\n"
1506-                             "                 assuming constant fps %.6f\n", i_frame, h->assume_fps );
1507+            x264_cli_log( "timecode", X264_LOG_INFO, "input timecode file missing data for frame %d and later\n"
1508+                          "                 assuming constant fps %.6f\n", i_frame, h->assume_fps );
1509             free( h->pts );
1510             h->pts = NULL;
1511         }
1512diff --git a/input/y4m.c b/input/y4m.c
1513index fd42140..9b39d2f 100644
1514--- a/input/y4m.c
1515+++ b/input/y4m.c
1516@@ -21,7 +21,8 @@
1517  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
1518  *****************************************************************************/
1519 
1520-#include "muxers.h"
1521+#include "input.h"
1522+#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "y4m", __VA_ARGS__ )
1523 
1524 typedef struct
1525 {
1526@@ -162,11 +163,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1527     if( colorspace == X264_CSP_NONE )
1528         colorspace = X264_CSP_I420;
1529 
1530-    if( colorspace != X264_CSP_I420 )
1531-    {
1532-        fprintf( stderr, "y4m [error]: colorspace unhandled\n" );
1533-        return -1;
1534-    }
1535+    FAIL_IF_ERROR( colorspace != X264_CSP_I420, "colorspace unhandled\n" )
1536 
1537     *p_handle = h;
1538     return 0;
1539@@ -202,21 +199,13 @@ static int read_frame_internal( x264_picture_t *p_pic, y4m_hnd_t *h )
1540         return -1;
1541 
1542     header[slen] = 0;
1543-    if( strncmp( header, Y4M_FRAME_MAGIC, slen ) )
1544-    {
1545-        fprintf( stderr, "y4m [error]: bad header magic (%"PRIx32" <=> %s)\n",
1546-                 M32(header), header );
1547-        return -1;
1548-    }
1549+    FAIL_IF_ERROR( strncmp( header, Y4M_FRAME_MAGIC, slen ), "bad header magic (%"PRIx32" <=> %s)\n",
1550+                   M32(header), header )
1551 
1552     /* Skip most of it */
1553     while( i < MAX_FRAME_HEADER && fgetc( h->fh ) != '\n' )
1554         i++;
1555-    if( i == MAX_FRAME_HEADER )
1556-    {
1557-        fprintf( stderr, "y4m [error]: bad frame header!\n" );
1558-        return -1;
1559-    }
1560+    FAIL_IF_ERROR( i == MAX_FRAME_HEADER, "bad frame header!\n" )
1561     h->frame_header_len = i+slen+1;
1562 
1563     if( fread( p_pic->img.plane[0], h->width * h->height, 1, h->fh ) <= 0
1564diff --git a/input/yuv.c b/input/yuv.c
1565index cbed7fc..613662c 100644
1566--- a/input/yuv.c
1567+++ b/input/yuv.c
1568@@ -21,7 +21,7 @@
1569  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
1570  *****************************************************************************/
1571 
1572-#include "muxers.h"
1573+#include "input.h"
1574 
1575 typedef struct
1576 {
1577@@ -45,11 +45,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1578     }
1579     else
1580         sscanf( opt->resolution, "%ux%u", &info->width, &info->height );
1581-    if( !info->width || !info->height )
1582-    {
1583-        fprintf( stderr, "yuv [error]: rawyuv input requires a resolution.\n" );
1584-        return -1;
1585-    }
1586+    FAIL_IF_ERR( !info->width || !info->height, "yuv", "rawyuv input requires a resolution.\n" )
1587 
1588     h->next_frame = 0;
1589     info->vfr     = 0;
1590diff --git a/muxers.h b/muxers.h
1591deleted file mode 100644
1592index b309320..0000000
1593--- a/muxers.h
1594+++ /dev/null
1595@@ -1,61 +0,0 @@
1596-/*****************************************************************************
1597- * muxers.h: h264 file i/o modules
1598- *****************************************************************************
1599- * Copyright (C) 2003-2009 x264 project
1600- *
1601- * Authors: Laurent Aimar <fenrir@via.ecp.fr>
1602- *          Loren Merritt <lorenm@u.washington.edu>
1603- *
1604- * This program is free software; you can redistribute it and/or modify
1605- * it under the terms of the GNU General Public License as published by
1606- * the Free Software Foundation; either version 2 of the License, or
1607- * (at your option) any later version.
1608- *
1609- * This program is distributed in the hope that it will be useful,
1610- * but WITHOUT ANY WARRANTY; without even the implied warranty of
1611- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1612- * GNU General Public License for more details.
1613- *
1614- * You should have received a copy of the GNU General Public License
1615- * along with this program; if not, write to the Free Software
1616- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
1617- *****************************************************************************/
1618-
1619-#ifndef X264_MUXERS_H
1620-#define X264_MUXERS_H
1621-
1622-#include "common/common.h"
1623-#include "x264.h"
1624-
1625-typedef void *hnd_t;
1626-
1627-static inline int64_t gcd( int64_t a, int64_t b )
1628-{
1629-    while( 1 )
1630-    {
1631-        int64_t c = a % b;
1632-        if( !c )
1633-            return b;
1634-        a = b;
1635-        b = c;
1636-    }
1637-}
1638-
1639-static inline int64_t lcm( int64_t a, int64_t b )
1640-{
1641-    return ( a / gcd( a, b ) ) * b;
1642-}
1643-
1644-static inline char *get_filename_extension( char *filename )
1645-{
1646-    char *ext = filename + strlen( filename );
1647-    while( *ext != '.' && ext > filename )
1648-        ext--;
1649-    ext += *ext == '.';
1650-    return ext;
1651-}
1652-
1653-#include "input/input.h"
1654-#include "output/output.h"
1655-
1656-#endif
1657diff --git a/output/flv.c b/output/flv.c
1658index e441b6d..9831a5b 100644
1659--- a/output/flv.c
1660+++ b/output/flv.c
1661@@ -18,7 +18,7 @@
1662  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
1663  *****************************************************************************/
1664 
1665-#include "muxers.h"
1666+#include "output.h"
1667 #include "flv_bytestream.h"
1668 
1669 #define CHECK(x)\
1670@@ -223,14 +223,14 @@ static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_
1671         if( prev_dts == dts )
1672         {
1673             double fps = ((double)p_flv->i_timebase_den / p_flv->i_timebase_num) / (p_picture->i_dts - p_flv->i_prev_dts);
1674-            fprintf( stderr, "flv [warning]: duplicate DTS %"PRId64" generated by rounding\n"
1675-                             "               current internal decoding framerate: %.6f fps\n", dts, fps );
1676+            x264_cli_log( "flv", X264_LOG_WARNING, "duplicate DTS %"PRId64" generated by rounding\n"
1677+                          "               current internal decoding framerate: %.6f fps\n", dts, fps );
1678         }
1679         if( prev_cts == cts )
1680         {
1681             double fps = ((double)p_flv->i_timebase_den / p_flv->i_timebase_num) / (p_picture->i_pts - p_flv->i_prev_pts);
1682-            fprintf( stderr, "flv [warning]: duplicate CTS %"PRId64" generated by rounding\n"
1683-                             "               current internal composition framerate: %.6f fps\n", cts, fps );
1684+            x264_cli_log( "flv", X264_LOG_WARNING, "duplicate CTS %"PRId64" generated by rounding\n"
1685+                          "               current internal composition framerate: %.6f fps\n", cts, fps );
1686         }
1687     }
1688     p_flv->i_prev_dts = p_picture->i_dts;
1689diff --git a/output/flv_bytestream.c b/output/flv_bytestream.c
1690index 316114c..e02476c 100644
1691--- a/output/flv_bytestream.c
1692+++ b/output/flv_bytestream.c
1693@@ -18,7 +18,7 @@
1694  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
1695  *****************************************************************************/
1696 
1697-#include "muxers.h"
1698+#include "output.h"
1699 #include "flv_bytestream.h"
1700 
1701 uint64_t dbl2int( double value )
1702diff --git a/output/matroska.c b/output/matroska.c
1703index 0304c84..a1219d0 100644
1704--- a/output/matroska.c
1705+++ b/output/matroska.c
1706@@ -18,7 +18,7 @@
1707  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
1708  *****************************************************************************/
1709 
1710-#include "muxers.h"
1711+#include "output.h"
1712 #include "matroska_ebml.h"
1713 
1714 typedef struct
1715diff --git a/output/matroska_ebml.c b/output/matroska_ebml.c
1716index 31b62f8..adfcaa8 100644
1717--- a/output/matroska_ebml.c
1718+++ b/output/matroska_ebml.c
1719@@ -18,7 +18,7 @@
1720  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
1721  *****************************************************************************/
1722 
1723-#include "muxers.h"
1724+#include "output.h"
1725 #include "matroska_ebml.h"
1726 
1727 #define CLSIZE 1048576
1728diff --git a/output/mp4.c b/output/mp4.c
1729index 0e3c2fc..f2ff5be 100644
1730--- a/output/mp4.c
1731+++ b/output/mp4.c
1732@@ -21,7 +21,7 @@
1733  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
1734  *****************************************************************************/
1735 
1736-#include "muxers.h"
1737+#include "output.h"
1738 #include <gpac/isomedia.h>
1739 
1740 #if HAVE_GF_MALLOC
1741@@ -61,12 +61,12 @@ static void recompute_bitrate_mp4( GF_ISOFile *p_file, int i_track )
1742 
1743     timescale = gf_isom_get_media_timescale( p_file, i_track );
1744     count = gf_isom_get_sample_count( p_file, i_track );
1745-    for( int i = 0; i < count; i++ )
1746+    for( u32 i = 0; i < count; i++ )
1747     {
1748         GF_ISOSample *samp = gf_isom_get_sample_info( p_file, i_track, i+1, &di, &offset );
1749         if( !samp )
1750         {
1751-            fprintf( stderr, "mp4 [error]: failure reading back frame %u\n", i );
1752+            x264_cli_log( "mp4", X264_LOG_ERROR, "failure reading back frame %u\n", i );
1753             break;
1754         }
1755 
1756@@ -163,11 +163,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle )
1757     FILE *fh = fopen( psz_filename, "w" );
1758     if( !fh )
1759         return -1;
1760-    else if( !x264_is_regular_file( fh ) )
1761-    {
1762-        fprintf( stderr, "mp4 [error]: MP4 output is incompatible with non-regular file `%s'\n", psz_filename );
1763-        return -1;
1764-    }
1765+    FAIL_IF_ERR( !x264_is_regular_file( fh ), "mp4", "MP4 output is incompatible with non-regular file `%s'\n", psz_filename )
1766     fclose( fh );
1767 
1768     if( !(p_mp4 = malloc( sizeof(mp4_hnd_t) )) )
1769diff --git a/output/output.h b/output/output.h
1770index c79b48e..094fefc 100644
1771--- a/output/output.h
1772+++ b/output/output.h
1773@@ -24,6 +24,8 @@
1774 #ifndef X264_OUTPUT_H
1775 #define X264_OUTPUT_H
1776 
1777+#include "x264cli.h"
1778+
1779 typedef struct
1780 {
1781     int (*open_file)( char *psz_filename, hnd_t *p_handle );
1782diff --git a/output/raw.c b/output/raw.c
1783index 02e4c56..fc418fb 100644
1784--- a/output/raw.c
1785+++ b/output/raw.c
1786@@ -21,7 +21,7 @@
1787  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
1788  *****************************************************************************/
1789 
1790-#include "muxers.h"
1791+#include "output.h"
1792 
1793 static int open_file( char *psz_filename, hnd_t *p_handle )
1794 {
1795diff --git a/x264.c b/x264.c
1796index f08ab41..741570c 100644
1797--- a/x264.c
1798+++ b/x264.c
1799@@ -31,9 +31,11 @@
1800 #include <getopt.h>
1801 
1802 #include "common/common.h"
1803-#include "common/cpu.h"
1804-#include "x264.h"
1805-#include "muxers.h"
1806+#include "x264cli.h"
1807+#include "input/input.h"
1808+#include "output/output.h"
1809+
1810+#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "x264", __VA_ARGS__ )
1811 
1812 #ifdef _WIN32
1813 #include <windows.h>
1814@@ -96,6 +98,7 @@ static const char * const muxer_names[] =
1815 };
1816 
1817 static const char * const pulldown_names[] = { "none", "22", "32", "64", "double", "triple", "euro", 0 };
1818+static const char * const log_level_names[] = { "none", "error", "warning", "info", "debug", 0 };
1819 
1820 typedef struct{
1821     int mod;
1822@@ -141,6 +144,48 @@ static void Help( x264_param_t *defaults, int longhelp );
1823 static int  Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt );
1824 static int  Encode( x264_param_t *param, cli_opt_t *opt );
1825 
1826+/* logging and printing for within the cli system */
1827+static int cli_log_level;
1828+void x264_cli_log( const char *name, int i_level, const char *fmt, ... )
1829+{
1830+    if( i_level > cli_log_level )
1831+        return;
1832+    char *s_level;
1833+    switch( i_level )
1834+    {
1835+        case X264_LOG_ERROR:
1836+            s_level = "error";
1837+            break;
1838+        case X264_LOG_WARNING:
1839+            s_level = "warning";
1840+            break;
1841+        case X264_LOG_INFO:
1842+            s_level = "info";
1843+            break;
1844+        case X264_LOG_DEBUG:
1845+            s_level = "debug";
1846+            break;
1847+        default:
1848+            s_level = "unknown";
1849+            break;
1850+    }
1851+    fprintf( stderr, "%s [%s]: ", name, s_level );
1852+    va_list arg;
1853+    va_start( arg, fmt );
1854+    vfprintf( stderr, fmt, arg );
1855+    va_end( arg );
1856+}
1857+
1858+void x264_cli_printf( int i_level, const char *fmt, ... )
1859+{
1860+    if( i_level > cli_log_level )
1861+        return;
1862+    va_list arg;
1863+    va_start( arg, fmt );
1864+    vfprintf( stderr, fmt, arg );
1865+    va_end( arg );
1866+}
1867+
1868 /****************************************************************************
1869  * main:
1870  ****************************************************************************/
1871@@ -571,6 +616,9 @@ static void Help( x264_param_t *defaults, int longhelp )
1872     H1( "  -v, --verbose               Print stats for each frame\n" );
1873     H1( "      --no-progress           Don't show the progress indicator while encoding\n" );
1874     H0( "      --quiet                 Quiet Mode\n" );
1875+    H1( "      --log-level <string>    Specify the maximum level of logging [\"%s\"]\n"
1876+        "                                  - %s\n", strtable_lookup( log_level_names, cli_log_level - X264_LOG_NONE ),
1877+                                       stringify_names( buf, log_level_names ) );
1878     H1( "      --psnr                  Enable PSNR computation\n" );
1879     H1( "      --ssim                  Enable SSIM computation\n" );
1880     H1( "      --threads <integer>     Force a specific number of threads\n" );
1881@@ -616,6 +664,7 @@ enum {
1882     OPT_TCFILE_OUT,
1883     OPT_TIMEBASE,
1884     OPT_PULLDOWN,
1885+    OPT_LOG_LEVEL
1886 } OptionsOPT;
1887 
1888 static char short_options[] = "8A:B:b:f:hI:i:m:o:p:q:r:t:Vvw";
1889@@ -729,6 +778,7 @@ static struct option long_options[] =
1890     { "ssim",              no_argument, NULL, 0 },
1891     { "quiet",             no_argument, NULL, OPT_QUIET },
1892     { "verbose",           no_argument, NULL, 'v' },
1893+    { "log-level",   required_argument, NULL, OPT_LOG_LEVEL },
1894     { "no-progress",       no_argument, NULL, OPT_NOPROGRESS },
1895     { "visualize",         no_argument, NULL, OPT_VISUALIZE },
1896     { "dump-yuv",    required_argument, NULL, 0 },
1897@@ -780,11 +830,11 @@ static int select_output( const char *muxer, char *filename, x264_param_t *param
1898         param->b_repeat_headers = 0;
1899         if( param->i_nal_hrd == X264_NAL_HRD_CBR )
1900         {
1901-            fprintf( stderr, "x264 [warning]: cbr nal-hrd is not compatible with mp4\n" );
1902+            x264_cli_log( "x264", X264_LOG_WARNING, "cbr nal-hrd is not compatible with mp4\n" );
1903             param->i_nal_hrd = X264_NAL_HRD_VBR;
1904         }
1905 #else
1906-        fprintf( stderr, "x264 [error]: not compiled with MP4 output support\n" );
1907+        x264_cli_log( "x264", X264_LOG_ERROR, "not compiled with MP4 output support\n" );
1908         return -1;
1909 #endif
1910     }
1911@@ -833,7 +883,7 @@ static int select_input( const char *demuxer, char *used_demuxer, char *filename
1912         input = avs_input;
1913         module = "avs";
1914 #else
1915-        fprintf( stderr, "x264 [error]: not compiled with AVS input support\n" );
1916+        x264_cli_log( "x264", X264_LOG_ERROR, "not compiled with AVS input support\n" );
1917         return -1;
1918 #endif
1919     }
1920@@ -877,11 +927,7 @@ static int select_input( const char *demuxer, char *used_demuxer, char *filename
1921             input = yuv_input;
1922         }
1923 
1924-        if( !(*p_handle) )
1925-        {
1926-            fprintf( stderr, "x264 [error]: could not open input file `%s' via any method!\n", filename );
1927-            return -1;
1928-        }
1929+        FAIL_IF_ERROR( !(*p_handle), "could not open input file `%s' via any method!\n", filename )
1930     }
1931     strcpy( used_demuxer, module );
1932 
1933@@ -932,6 +978,7 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
1934     char *tune = NULL;
1935 
1936     x264_param_default( &defaults );
1937+    cli_log_level = defaults.i_log_level;
1938 
1939     memset( opt, 0, sizeof(cli_opt_t) );
1940     memset( &input_opt, 0, sizeof(cli_input_opt_t) );
1941@@ -1004,32 +1051,20 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
1942                 output_filename = optarg;
1943                 break;
1944             case OPT_MUXER:
1945-                if( parse_enum_name( optarg, muxer_names, &muxer ) < 0 )
1946-                {
1947-                    fprintf( stderr, "x264 [error]: Unknown muxer `%s'\n", optarg );
1948-                    return -1;
1949-                }
1950+                FAIL_IF_ERROR( parse_enum_name( optarg, muxer_names, &muxer ), "Unknown muxer `%s'\n", optarg )
1951                 break;
1952             case OPT_DEMUXER:
1953-                if( parse_enum_name( optarg, demuxer_names, &demuxer ) < 0 )
1954-                {
1955-                    fprintf( stderr, "x264 [error]: Unknown demuxer `%s'\n", optarg );
1956-                    return -1;
1957-                }
1958+                FAIL_IF_ERROR( parse_enum_name( optarg, demuxer_names, &demuxer ), "Unknown demuxer `%s'\n", optarg )
1959                 break;
1960             case OPT_INDEX:
1961                 input_opt.index_file = optarg;
1962                 break;
1963             case OPT_QPFILE:
1964                 opt->qpfile = fopen( optarg, "rb" );
1965-                if( !opt->qpfile )
1966-                {
1967-                    fprintf( stderr, "x264 [error]: can't open qpfile `%s'\n", optarg );
1968-                    return -1;
1969-                }
1970-                else if( !x264_is_regular_file( opt->qpfile ) )
1971+                FAIL_IF_ERROR( !opt->qpfile, "can't open qpfile `%s'\n", optarg )
1972+                if( !x264_is_regular_file( opt->qpfile ) )
1973                 {
1974-                    fprintf( stderr, "x264 [error]: qpfile incompatible with non-regular file `%s'\n", optarg );
1975+                    x264_cli_log( "x264", X264_LOG_ERROR, "qpfile incompatible with non-regular file `%s'\n", optarg );
1976                     fclose( opt->qpfile );
1977                     return -1;
1978                 }
1979@@ -1038,11 +1073,17 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
1980                 b_thread_input = 1;
1981                 break;
1982             case OPT_QUIET:
1983-                param->i_log_level = X264_LOG_NONE;
1984+                cli_log_level = param->i_log_level = X264_LOG_NONE;
1985                 break;
1986             case 'v':
1987-                param->i_log_level = X264_LOG_DEBUG;
1988+                cli_log_level = param->i_log_level = X264_LOG_DEBUG;
1989                 break;
1990+            case OPT_LOG_LEVEL:
1991+                if( !parse_enum_value( optarg, log_level_names, &cli_log_level ) )
1992+                    cli_log_level += X264_LOG_NONE;
1993+                else
1994+                    cli_log_level = atoi( optarg );
1995+                param->i_log_level = cli_log_level;
1996             case OPT_NOPROGRESS:
1997                 opt->b_progress = 0;
1998                 break;
1999@@ -1051,7 +1092,7 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
2000                 param->b_visualize = 1;
2001                 b_exit_on_ctrl_c = 1;
2002 #else
2003-                fprintf( stderr, "x264 [warning]: not compiled with visualization support\n" );
2004+                x264_cli_log( "x264", X264_LOG_WARNING, "not compiled with visualization support\n" );
2005 #endif
2006                 break;
2007             case OPT_TUNE:
2008@@ -1078,18 +1119,13 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
2009                 break;
2010             case OPT_TCFILE_OUT:
2011                 opt->tcfile_out = fopen( optarg, "wb" );
2012-                if( !opt->tcfile_out )
2013-                {
2014-                    fprintf( stderr, "x264 [error]: can't open `%s'\n", optarg );
2015-                    return -1;
2016-                }
2017+                FAIL_IF_ERROR( !opt->tcfile_out, "can't open `%s'\n", optarg )
2018                 break;
2019             case OPT_TIMEBASE:
2020                 input_opt.timebase = optarg;
2021                 break;
2022             case OPT_PULLDOWN:
2023-                if( parse_enum_value( optarg, pulldown_names, &opt->i_pulldown ) < 0 )
2024-                    return -1;
2025+                FAIL_IF_ERROR( parse_enum_value( optarg, pulldown_names, &opt->i_pulldown ), "Unknown pulldown `%s'\n", optarg )
2026                 break;
2027             default:
2028 generic_option:
2029@@ -1116,7 +1152,7 @@ generic_option:
2030         if( b_error )
2031         {
2032             const char *name = long_options_index > 0 ? long_options[long_options_index].name : argv[optind-2];
2033-            fprintf( stderr, "x264 [error]: invalid argument: %s = %s\n", name, optarg );
2034+            x264_cli_log( "x264", X264_LOG_ERROR, "invalid argument: %s = %s\n", name, optarg );
2035             return -1;
2036         }
2037     }
2038@@ -1130,20 +1166,12 @@ generic_option:
2039         return -1;
2040 
2041     /* Get the file name */
2042-    if( optind > argc - 1 || !output_filename )
2043-    {
2044-        fprintf( stderr, "x264 [error]: No %s file. Run x264 --help for a list of options.\n",
2045-                 optind > argc - 1 ? "input" : "output" );
2046-        return -1;
2047-    }
2048+    FAIL_IF_ERROR( optind > argc - 1 || !output_filename, "No %s file. Run x264 --help for a list of options.\n",
2049+                   optind > argc - 1 ? "input" : "output" )
2050 
2051     if( select_output( muxer, output_filename, param ) )
2052         return -1;
2053-    if( output.open_file( output_filename, &opt->hout ) )
2054-    {
2055-        fprintf( stderr, "x264 [error]: could not open output file `%s'\n", output_filename );
2056-        return -1;
2057-    }
2058+    FAIL_IF_ERROR( output.open_file( output_filename, &opt->hout ), "could not open output file `%s'\n", output_filename )
2059 
2060     input_filename = argv[optind++];
2061     input_opt.resolution = optind < argc ? argv[optind++] : NULL;
2062@@ -1163,39 +1191,22 @@ generic_option:
2063     if( select_input( demuxer, demuxername, input_filename, &opt->hin, &info, &input_opt ) )
2064         return -1;
2065 
2066-    if( !opt->hin && input.open_file( input_filename, &opt->hin, &info, &input_opt ) )
2067-    {
2068-        fprintf( stderr, "x264 [error]: could not open input file `%s'\n", input_filename );
2069-        return -1;
2070-    }
2071+    FAIL_IF_ERROR( !opt->hin && input.open_file( input_filename, &opt->hin, &info, &input_opt ),
2072+                   "could not open input file `%s'\n", input_filename )
2073 
2074     x264_reduce_fraction( &info.sar_width, &info.sar_height );
2075     x264_reduce_fraction( &info.fps_num, &info.fps_den );
2076-    if( param->i_log_level >= X264_LOG_INFO )
2077-        fprintf( stderr, "%s [info]: %dx%d%c %d:%d @ %d/%d fps (%cfr)\n", demuxername, info.width,
2078-                 info.height, info.interlaced ? 'i' : 'p', info.sar_width, info.sar_height,
2079-                 info.fps_num, info.fps_den, info.vfr ? 'v' : 'c' );
2080+    x264_cli_log( demuxername, X264_LOG_INFO, "%dx%d%c %d:%d @ %d/%d fps (%cfr)\n", info.width,
2081+                  info.height, info.interlaced ? 'i' : 'p', info.sar_width, info.sar_height,
2082+                  info.fps_num, info.fps_den, info.vfr ? 'v' : 'c' );
2083 
2084     if( tcfile_name )
2085     {
2086-        if( b_user_fps )
2087-        {
2088-            fprintf( stderr, "x264 [error]: --fps + --tcfile-in is incompatible.\n" );
2089-            return -1;
2090-        }
2091-        if( timecode_input.open_file( tcfile_name, &opt->hin, &info, &input_opt ) )
2092-        {
2093-            fprintf( stderr, "x264 [error]: timecode input failed\n" );
2094-            return -1;
2095-        }
2096-        else
2097-            input = timecode_input;
2098-    }
2099-    else if( !info.vfr && input_opt.timebase )
2100-    {
2101-        fprintf( stderr, "x264 [error]: --timebase is incompatible with cfr input\n" );
2102-        return -1;
2103+        FAIL_IF_ERROR( b_user_fps, "--fps + --tcfile-in is incompatible.\n" )
2104+        FAIL_IF_ERROR( timecode_input.open_file( tcfile_name, &opt->hin, &info, &input_opt ), "timecode input failed\n" )
2105+        input = timecode_input;
2106     }
2107+    else FAIL_IF_ERROR( !info.vfr && input_opt.timebase, "--timebase is incompatible with cfr input\n" )
2108 
2109     /* set param flags from the info flags as necessary */
2110     param->i_csp       = info.csp;
2111@@ -1204,9 +1215,9 @@ generic_option:
2112     param->i_width     = info.width;
2113     if( !b_user_interlaced && info.interlaced )
2114     {
2115-        fprintf( stderr, "x264 [warning]: input appears to be interlaced, enabling %cff interlaced mode.\n"
2116-                         "                If you want otherwise, use --no-interlaced or --%cff\n",
2117-                 info.tff ? 't' : 'b', info.tff ? 'b' : 't' );
2118+        x264_cli_log( "x264", X264_LOG_WARNING, "input appears to be interlaced, enabling %cff interlaced mode.\n"
2119+                      "                If you want otherwise, use --no-interlaced or --%cff\n",
2120+                      info.tff ? 't' : 'b', info.tff ? 'b' : 't' );
2121         param->b_interlaced = 1;
2122         param->b_tff = !!info.tff;
2123     }
2124@@ -1230,21 +1241,14 @@ generic_option:
2125         uint64_t i_user_timebase_num;
2126         uint64_t i_user_timebase_den;
2127         int ret = sscanf( input_opt.timebase, "%"SCNu64"/%"SCNu64, &i_user_timebase_num, &i_user_timebase_den );
2128-        if( !ret )
2129-        {
2130-            fprintf( stderr, "x264 [error]: invalid argument: timebase = %s\n", input_opt.timebase );
2131-            return -1;
2132-        }
2133+        FAIL_IF_ERROR( !ret, "invalid argument: timebase = %s\n", input_opt.timebase )
2134         else if( ret == 1 )
2135         {
2136             i_user_timebase_num = param->i_timebase_num;
2137             i_user_timebase_den = strtoul( input_opt.timebase, NULL, 10 );
2138         }
2139-        if( i_user_timebase_num > UINT32_MAX || i_user_timebase_den > UINT32_MAX )
2140-        {
2141-            fprintf( stderr, "x264 [error]: timebase you specified exceeds H.264 maximum\n" );
2142-            return -1;
2143-        }
2144+        FAIL_IF_ERROR( i_user_timebase_num > UINT32_MAX || i_user_timebase_den > UINT32_MAX,
2145+                       "timebase you specified exceeds H.264 maximum\n" )
2146         opt->timebase_convert_multiplier = ((double)i_user_timebase_den / param->i_timebase_den)
2147                                          * ((double)param->i_timebase_num / i_user_timebase_num);
2148         param->i_timebase_num = i_user_timebase_num;
2149@@ -1261,13 +1265,8 @@ generic_option:
2150     if( b_thread_input || param->i_threads > 1
2151         || (param->i_threads == X264_THREADS_AUTO && x264_cpu_num_processors() > 1) )
2152     {
2153-        if( thread_input.open_file( NULL, &opt->hin, &info, NULL ) )
2154-        {
2155-            fprintf( stderr, "x264 [error]: threaded input failed\n" );
2156-            return -1;
2157-        }
2158-        else
2159-            input = thread_input;
2160+        FAIL_IF_ERROR( thread_input.open_file( NULL, &opt->hin, &info, NULL ), "threaded input failed\n" )
2161+        input = thread_input;
2162     }
2163 #endif
2164 
2165@@ -1321,7 +1320,7 @@ static void parse_qpfile( cli_opt_t *opt, x264_picture_t *pic, int i_frame )
2166         else ret = 0;
2167         if( ret != 3 || qp < -1 || qp > 51 )
2168         {
2169-            fprintf( stderr, "x264 [error]: can't parse qpfile for frame %d\n", i_frame );
2170+            x264_cli_log( "x264", X264_LOG_ERROR, "can't parse qpfile for frame %d\n", i_frame );
2171             fclose( opt->qpfile );
2172             opt->qpfile = NULL;
2173             pic->i_type = X264_TYPE_AUTO;
2174@@ -1344,11 +1343,7 @@ static int  Encode_frame( x264_t *h, hnd_t hout, x264_picture_t *pic, int64_t *l
2175 
2176     i_frame_size = x264_encoder_encode( h, &nal, &i_nal, pic, &pic_out );
2177 
2178-    if( i_frame_size < 0 )
2179-    {
2180-        fprintf( stderr, "x264 [error]: x264_encoder_encode failed\n" );
2181-        return -1;
2182-    }
2183+    FAIL_IF_ERROR( i_frame_size < 0, "x264_encoder_encode failed\n" );
2184 
2185     if( i_frame_size )
2186     {
2187@@ -1424,17 +1419,14 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
2188         param->b_pic_struct = 1;
2189         pulldown = &pulldown_values[opt->i_pulldown];
2190         param->i_timebase_num = param->i_fps_den;
2191-        if( fmod( param->i_fps_num * pulldown->fps_factor, 1 ) )
2192-        {
2193-            fprintf( stderr, "x264 [error]: unsupported framerate for chosen pulldown\n" );
2194-            return -1;
2195-        }
2196+        FAIL_IF_ERROR( fmod( param->i_fps_num * pulldown->fps_factor, 1 ),
2197+                       "unsupported framerate for chosen pulldown\n" )
2198         param->i_timebase_den = param->i_fps_num * pulldown->fps_factor;
2199     }
2200 
2201     if( ( h = x264_encoder_open( param ) ) == NULL )
2202     {
2203-        fprintf( stderr, "x264 [error]: x264_encoder_open failed\n" );
2204+        x264_cli_log( "x264", X264_LOG_ERROR, "x264_encoder_open failed\n" );
2205         input.close_file( opt->hin );
2206         return -1;
2207     }
2208@@ -1445,27 +1437,19 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
2209 
2210     if( output.set_param( opt->hout, param ) )
2211     {
2212-        fprintf( stderr, "x264 [error]: can't set outfile param\n" );
2213+        x264_cli_log( "x264", X264_LOG_ERROR, "can't set outfile param\n" );
2214         input.close_file( opt->hin );
2215         output.close_file( opt->hout, largest_pts, second_largest_pts );
2216         return -1;
2217     }
2218 
2219     /* Create a new pic */
2220-    if( input.picture_alloc( &pic, param->i_csp, param->i_width, param->i_height ) )
2221-    {
2222-        fprintf( stderr, "x264 [error]: malloc failed\n" );
2223-        return -1;
2224-    }
2225+    FAIL_IF_ERROR( input.picture_alloc( &pic, param->i_csp, param->i_width, param->i_height ), "malloc failed\n" )
2226 
2227     i_start = x264_mdate();
2228     /* ticks/frame = ticks/second / frames/second */
2229     ticks_per_frame = (int64_t)param->i_timebase_den * param->i_fps_den / param->i_timebase_num / param->i_fps_num;
2230-    if( ticks_per_frame < 1 )
2231-    {
2232-        fprintf( stderr, "x264 [error]: ticks_per_frame invalid: %"PRId64"\n", ticks_per_frame );
2233-        return -1;
2234-    }
2235+    FAIL_IF_ERROR( ticks_per_frame < 1, "ticks_per_frame invalid: %"PRId64"\n", ticks_per_frame )
2236 
2237     if( !param->b_repeat_headers )
2238     {
2239@@ -1473,12 +1457,7 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
2240         x264_nal_t *headers;
2241         int i_nal;
2242 
2243-        if( x264_encoder_headers( h, &headers, &i_nal ) < 0 )
2244-        {
2245-            fprintf( stderr, "x264 [error]: x264_encoder_headers failed\n" );
2246-            return -1;
2247-        }
2248-
2249+        FAIL_IF_ERROR( x264_encoder_headers( h, &headers, &i_nal ) < 0, "x264_encoder_headers failed\n" )
2250         if( (i_file = output.write_headers( opt->hout, headers )) < 0 )
2251             return -1;
2252     }
2253@@ -1508,15 +1487,12 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
2254 
2255         if( pic.i_pts <= largest_pts )
2256         {
2257-            if( param->i_log_level >= X264_LOG_WARNING )
2258-            {
2259-                if( param->i_log_level >= X264_LOG_DEBUG || pts_warning_cnt < MAX_PTS_WARNING )
2260-                    fprintf( stderr, "x264 [warning]: non-strictly-monotonic pts at frame %d (%"PRId64" <= %"PRId64")\n",
2261+            if( cli_log_level >= X264_LOG_DEBUG || pts_warning_cnt < MAX_PTS_WARNING )
2262+                x264_cli_log( "x264", X264_LOG_WARNING, "non-strictly-monotonic pts at frame %d (%"PRId64" <= %"PRId64")\n",
2263                              i_frame, output_pts, largest_pts * dts_compress_multiplier );
2264-                else if( pts_warning_cnt == MAX_PTS_WARNING )
2265-                    fprintf( stderr, "x264 [warning]: too many nonmonotonic pts warnings, suppressing further ones\n" );
2266-                pts_warning_cnt++;
2267-            }
2268+            else if( pts_warning_cnt == MAX_PTS_WARNING )
2269+                x264_cli_log( "x264", X264_LOG_WARNING, "too many nonmonotonic pts warnings, suppressing further ones\n" );
2270+            pts_warning_cnt++;
2271             pic.i_pts = largest_pts + ticks_per_frame;
2272             output_pts = pic.i_pts * dts_compress_multiplier;
2273         }
2274@@ -1573,8 +1549,8 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
2275         if( opt->b_progress && i_frame_output % i_update_interval == 0 && i_frame_output )
2276             Print_status( i_start, i_frame_output, i_frame_total, i_file, param, 2 * last_dts - prev_dts - first_dts );
2277     }
2278-    if( pts_warning_cnt >= MAX_PTS_WARNING && param->i_log_level < X264_LOG_DEBUG )
2279-        fprintf( stderr, "x264 [warning]: %d suppressed nonmonotonic pts warnings\n", pts_warning_cnt-MAX_PTS_WARNING );
2280+    if( pts_warning_cnt >= MAX_PTS_WARNING && cli_log_level < X264_LOG_DEBUG )
2281+        x264_cli_log( "x264", X264_LOG_WARNING, "%d suppressed nonmonotonic pts warnings\n", pts_warning_cnt-MAX_PTS_WARNING );
2282 
2283     /* duration algorithm fails when only 1 frame is output */
2284     if( i_frame_output == 1 )
2285diff --git a/x264cli.h b/x264cli.h
2286new file mode 100644
2287index 0000000..1acca56
2288--- /dev/null
2289+++ b/x264cli.h
2290@@ -0,0 +1,67 @@
2291+/*****************************************************************************
2292+ * x264cli.h: x264cli common
2293+ *****************************************************************************
2294+ * Copyright (C) 2003-2010 x264 project
2295+ *
2296+ * Authors: Laurent Aimar <fenrir@via.ecp.fr>
2297+ *          Loren Merritt <lorenm@u.washington.edu>
2298+ *
2299+ * This program is free software; you can redistribute it and/or modify
2300+ * it under the terms of the GNU General Public License as published by
2301+ * the Free Software Foundation; either version 2 of the License, or
2302+ * (at your option) any later version.
2303+ *
2304+ * This program is distributed in the hope that it will be useful,
2305+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2306+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
2307+ * GNU General Public License for more details.
2308+ *
2309+ * You should have received a copy of the GNU General Public License
2310+ * along with this program; if not, write to the Free Software
2311+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
2312+ *****************************************************************************/
2313+
2314+#ifndef X264_CLI_H
2315+#define X264_CLI_H
2316+
2317+#include "common/common.h"
2318+
2319+typedef void *hnd_t;
2320+
2321+static inline int64_t gcd( int64_t a, int64_t b )
2322+{
2323+    while( 1 )
2324+    {
2325+        int64_t c = a % b;
2326+        if( !c )
2327+            return b;
2328+        a = b;
2329+        b = c;
2330+    }
2331+}
2332+
2333+static inline int64_t lcm( int64_t a, int64_t b )
2334+{
2335+    return ( a / gcd( a, b ) ) * b;
2336+}
2337+
2338+static inline char *get_filename_extension( char *filename )
2339+{
2340+    char *ext = filename + strlen( filename );
2341+    while( *ext != '.' && ext > filename )
2342+        ext--;
2343+    ext += *ext == '.';
2344+    return ext;
2345+}
2346+
2347+void x264_cli_log( const char *name, int i_level, const char *fmt, ... );
2348+void x264_cli_printf( int i_level, const char *fmt, ... );
2349+
2350+#define FAIL_IF_ERR( cond, name, ... )\
2351+if( cond )\
2352+{\
2353+    x264_cli_log( name, X264_LOG_ERROR, __VA_ARGS__ );\
2354+    return -1;\
2355+}
2356+
2357+#endif
2358-- 
23591.7.1
2360
2361
2362From f378994ab3c816aaab2b795143e31919fdee1f2d Mon Sep 17 00:00:00 2001
2363From: Jason Garrett-Glaser <darkshikari@gmail.com>
2364Date: Wed, 30 Jun 2010 13:06:22 -0700
2365Subject: [PATCH 5/7] Don't check i16x16 planar mode unless previous modes were useful
2366 Saves ~160 clocks per MB at subme=1, ~270 per MB at subme>1 (measured on Core i7).
2367 Negligle effect on compression.
2368
2369Also make a few more arrays static.
2370---
2371 encoder/analyse.c |   29 +++++++++++++++++++----------
2372 encoder/set.c     |    3 ++-
2373 2 files changed, 21 insertions(+), 11 deletions(-)
2374
2375diff --git a/encoder/analyse.c b/encoder/analyse.c
2376index 696c78f..cdbdd1e 100644
2377--- a/encoder/analyse.c
2378+++ b/encoder/analyse.c
2379@@ -646,16 +646,27 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
2380     /* 16x16 prediction selection */
2381     const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra );
2382 
2383+    /* Not heavily tuned */
2384+    static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
2385+    int i16x16_thresh = a->b_fast_intra ? (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX;
2386+
2387     if( !h->mb.b_lossless && predict_mode[3] >= 0 )
2388     {
2389         h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );
2390-        h->predict_16x16[I_PRED_16x16_P]( p_dst );
2391-        a->i_satd_i16x16_dir[I_PRED_16x16_P] =
2392-            h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
2393-        for( int i = 0; i < 4; i++ )
2394+        a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0);
2395+        a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1);
2396+        a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2);
2397+        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0], a->i_predict16x16, 0 );
2398+        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1], a->i_predict16x16, 1 );
2399+        COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2], a->i_predict16x16, 2 );
2400+
2401+        /* Plane is expensive, so don't check it unless one of the previous modes was useful. */
2402+        if( a->i_satd_i16x16 <= i16x16_thresh )
2403         {
2404-            int cost = a->i_satd_i16x16_dir[i] += lambda * bs_size_ue(i);
2405-            COPY2_IF_LT( a->i_satd_i16x16, cost, a->i_predict16x16, i );
2406+            h->predict_16x16[I_PRED_16x16_P]( p_dst );
2407+            a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
2408+            a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda * bs_size_ue(3);
2409+            COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 );
2410         }
2411     }
2412     else
2413@@ -681,9 +692,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
2414         /* cavlc mb type prefix */
2415         a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
2416 
2417-    /* Not heavily tuned */
2418-    const uint8_t i16x16_thresh[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
2419-    if( a->b_fast_intra && a->i_satd_i16x16 > (i16x16_thresh[h->mb.i_subpel_refine]*i_satd_inter)>>1 )
2420+    if( a->i_satd_i16x16 > i16x16_thresh )
2421         return;
2422 
2423     /* 8x8 prediction selection */
2424@@ -784,7 +793,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
2425             i_cost = (i_cost * cost_div_fix8[idx]) >> 8;
2426         }
2427         /* Not heavily tuned */
2428-        const uint8_t i8x8_thresh[11] = { 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6 };
2429+        static const uint8_t i8x8_thresh[11] = { 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6 };
2430         if( X264_MIN(i_cost, a->i_satd_i16x16) > (i_satd_inter*i8x8_thresh[h->mb.i_subpel_refine])>>2 )
2431             return;
2432     }
2433diff --git a/encoder/set.c b/encoder/set.c
2434index 8d007aa..8ea6eac 100644
2435--- a/encoder/set.c
2436+++ b/encoder/set.c
2437@@ -534,7 +534,8 @@ int x264_sei_version_write( x264_t *h, bs_t *s )
2438 {
2439     int i;
2440     // random ID number generated according to ISO-11578
2441-    const uint8_t uuid[16] = {
2442+    static const uint8_t uuid[16] =
2443+    {
2444         0xdc, 0x45, 0xe9, 0xbd, 0xe6, 0xd9, 0x48, 0xb7,
2445         0x96, 0x2c, 0xd8, 0x20, 0xd9, 0x23, 0xee, 0xef
2446     };
2447-- 
24481.7.1
2449
2450
2451From 293b47bc2c52abe5143913ef3d126e6170edaf5c Mon Sep 17 00:00:00 2001
2452From: Jason Garrett-Glaser <darkshikari@gmail.com>
2453Date: Wed, 30 Jun 2010 13:55:46 -0700
2454Subject: [PATCH 6/7] Support infinite keyint (--keyint infinite).
2455 This just means x264 won't insert non-scenecut keyframes.
2456 Useful for streaming when using interactive error recovery or some other mechanism that makes keyframes unnecessary.
2457
2458Also change POC logic to limit POC/framenum LSB size (to save bits per slice).
2459Also fix a bug in the CPB underflow detection code (didn't affect the bitstream, just resulted in the failure to print certain warning messages).
2460---
2461 common/common.c       |    7 ++++---
2462 encoder/encoder.c     |    8 ++++----
2463 encoder/ratecontrol.c |   10 +++++-----
2464 encoder/set.c         |   29 +++++++++++++++++------------
2465 encoder/slicetype.c   |    2 +-
2466 x264.c                |    2 +-
2467 x264.h                |    3 ++-
2468 7 files changed, 34 insertions(+), 27 deletions(-)
2469
2470diff --git a/common/common.c b/common/common.c
2471index 8c7cf3c..14dd716 100644
2472--- a/common/common.c
2473+++ b/common/common.c
2474@@ -638,9 +638,10 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
2475         p->i_dpb_size = atoi(value);
2476     OPT("keyint")
2477     {
2478-        p->i_keyint_max = atoi(value);
2479-        if( p->i_keyint_min > p->i_keyint_max )
2480-            p->i_keyint_min = p->i_keyint_max;
2481+        if( strstr( value, "infinite" ) )
2482+            p->i_keyint_max = X264_KEYINT_MAX_INFINITE;
2483+        else
2484+            p->i_keyint_max = atoi(value);
2485     }
2486     OPT2("min-keyint", "keyint-min")
2487     {
2488diff --git a/encoder/encoder.c b/encoder/encoder.c
2489index 5cd3307..31cb84a 100644
2490--- a/encoder/encoder.c
2491+++ b/encoder/encoder.c
2492@@ -567,8 +567,7 @@ static int x264_validate_parameters( x264_t *h )
2493 
2494     h->param.i_frame_reference = x264_clip3( h->param.i_frame_reference, 1, 16 );
2495     h->param.i_dpb_size = x264_clip3( h->param.i_dpb_size, 1, 16 );
2496-    if( h->param.i_keyint_max <= 0 )
2497-        h->param.i_keyint_max = 1;
2498+    h->param.i_keyint_max = x264_clip3( h->param.i_keyint_max, 1, X264_KEYINT_MAX_INFINITE );
2499     if( h->param.i_scenecut_threshold < 0 )
2500         h->param.i_scenecut_threshold = 0;
2501     if( !h->param.analyse.i_subpel_refine && h->param.analyse.i_direct_mv_pred > X264_DIRECT_PRED_SPATIAL )
2502@@ -627,9 +626,10 @@ static int x264_validate_parameters( x264_t *h )
2503     h->param.rc.f_qcompress = x264_clip3f( h->param.rc.f_qcompress, 0.0, 1.0 );
2504     if( h->param.i_keyint_max == 1 || h->param.rc.f_qcompress == 1 )
2505         h->param.rc.b_mb_tree = 0;
2506-    if( !h->param.rc.i_lookahead && !h->param.b_intra_refresh && h->param.rc.b_mb_tree )
2507+    if( (!h->param.b_intra_refresh && h->param.i_keyint_max != X264_KEYINT_MAX_INFINITE) &&
2508+        !h->param.rc.i_lookahead && h->param.rc.b_mb_tree )
2509     {
2510-        x264_log( h, X264_LOG_WARNING, "lookaheadless mb-tree requires intra refresh\n" );
2511+        x264_log( h, X264_LOG_WARNING, "lookaheadless mb-tree requires intra refresh or infinite keyint\n" );
2512         h->param.rc.b_mb_tree = 0;
2513     }
2514     if( h->param.rc.b_stat_read )
2515diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
2516index 1030ef2..6fdaa98 100644
2517--- a/encoder/ratecontrol.c
2518+++ b/encoder/ratecontrol.c
2519@@ -492,13 +492,13 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
2520             // arbitrary
2521             #define MAX_DURATION 0.5
2522 
2523-            int max_cpb_output_delay = h->param.i_keyint_max * MAX_DURATION * h->sps->vui.i_time_scale / h->sps->vui.i_num_units_in_tick;
2524+            int max_cpb_output_delay = X264_MIN( h->param.i_keyint_max * MAX_DURATION * h->sps->vui.i_time_scale / h->sps->vui.i_num_units_in_tick, INT_MAX );
2525             int max_dpb_output_delay = h->sps->vui.i_max_dec_frame_buffering * MAX_DURATION * h->sps->vui.i_time_scale / h->sps->vui.i_num_units_in_tick;
2526             int max_delay = (int)(90000.0 * (double)h->sps->vui.hrd.i_cpb_size_unscaled / h->sps->vui.hrd.i_bit_rate_unscaled + 0.5);
2527 
2528             h->sps->vui.hrd.i_initial_cpb_removal_delay_length = 2 + x264_clip3( 32 - x264_clz( max_delay ), 4, 22 );
2529-            h->sps->vui.hrd.i_cpb_removal_delay_length = x264_clip3( 32 - x264_clz( max_cpb_output_delay ), 4, 32 );
2530-            h->sps->vui.hrd.i_dpb_output_delay_length  = x264_clip3( 32 - x264_clz( max_dpb_output_delay ), 4, 32 );
2531+            h->sps->vui.hrd.i_cpb_removal_delay_length = x264_clip3( 32 - x264_clz( max_cpb_output_delay ), 4, 31 );
2532+            h->sps->vui.hrd.i_dpb_output_delay_length  = x264_clip3( 32 - x264_clz( max_dpb_output_delay ), 4, 31 );
2533 
2534             #undef MAX_DURATION
2535 
2536@@ -1781,10 +1781,10 @@ void x264_hrd_fullness( x264_t *h )
2537     uint64_t cpb_size = (uint64_t)h->sps->vui.hrd.i_cpb_size_unscaled * h->sps->vui.i_time_scale;
2538     uint64_t multiply_factor = 180000 / rct->hrd_multiply_denom;
2539 
2540-    if( cpb_state < 0 || cpb_state > cpb_size )
2541+    if( rct->buffer_fill_final < 0 || rct->buffer_fill_final > cpb_size )
2542     {
2543          x264_log( h, X264_LOG_WARNING, "CPB %s: %.0lf bits in a %.0lf-bit buffer\n",
2544-                   cpb_state < 0 ? "underflow" : "overflow", (float)cpb_state/denom, (float)cpb_size/denom );
2545+                   rct->buffer_fill_final < 0 ? "underflow" : "overflow", (float)rct->buffer_fill_final/denom, (float)cpb_size/denom );
2546     }
2547 
2548     h->initial_cpb_removal_delay = (multiply_factor * cpb_state + denom) / (2*denom);
2549diff --git a/encoder/set.c b/encoder/set.c
2550index 8ea6eac..9e6e736 100644
2551--- a/encoder/set.c
2552+++ b/encoder/set.c
2553@@ -99,6 +99,7 @@ static void x264_sei_write( bs_t *s, uint8_t *p_start )
2554 void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
2555 {
2556     sps->i_id = i_id;
2557+    int max_frame_num;
2558 
2559     sps->b_qpprime_y_zero_transform_bypass = param->rc.i_rc_method == X264_RC_CQP && param->rc.i_qp_constant == 0;
2560     if( sps->b_qpprime_y_zero_transform_bypass )
2561@@ -118,15 +119,27 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
2562     /* Never set constraint_set2, it is not necessary and not used in real world. */
2563     sps->b_constraint_set2  = 0;
2564 
2565-    sps->i_log2_max_frame_num = 4;  /* at least 4 */
2566-    while( (1 << sps->i_log2_max_frame_num) <= param->i_keyint_max && sps->i_log2_max_frame_num < 10 )
2567+    sps->vui.i_num_reorder_frames = param->i_bframe_pyramid ? 2 : param->i_bframe ? 1 : 0;
2568+    /* extra slot with pyramid so that we don't have to override the
2569+     * order of forgetting old pictures */
2570+    sps->vui.i_max_dec_frame_buffering =
2571+    sps->i_num_ref_frames = X264_MIN(16, X264_MAX4(param->i_frame_reference, 1 + sps->vui.i_num_reorder_frames,
2572+                            param->i_bframe_pyramid ? 4 : 1, param->i_dpb_size));
2573+    sps->i_num_ref_frames -= param->i_bframe_pyramid == X264_B_PYRAMID_STRICT;
2574+
2575+    /* number of refs + current frame */
2576+    max_frame_num = sps->vui.i_max_dec_frame_buffering * (!!param->i_bframe_pyramid+1) + 1;
2577+    sps->i_log2_max_frame_num = 4;
2578+    while( (1 << sps->i_log2_max_frame_num) <= max_frame_num )
2579         sps->i_log2_max_frame_num++;
2580-    sps->i_log2_max_frame_num++;
2581 
2582     sps->i_poc_type = 0;
2583     if( sps->i_poc_type == 0 )
2584     {
2585-        sps->i_log2_max_poc_lsb = sps->i_log2_max_frame_num + 1;    /* max poc = 2*frame_num */
2586+        int max_delta_poc = (param->i_bframe + 2) * (!!param->i_bframe_pyramid + 1) * 2;
2587+        sps->i_log2_max_poc_lsb = 4;
2588+        while( (1 << sps->i_log2_max_poc_lsb) <= max_delta_poc * 2 )
2589+            sps->i_log2_max_poc_lsb++;
2590     }
2591     else if( sps->i_poc_type == 1 )
2592     {
2593@@ -219,14 +232,6 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
2594 
2595     // NOTE: HRD related parts of the SPS are initialised in x264_ratecontrol_init_reconfigurable
2596 
2597-    sps->vui.i_num_reorder_frames = param->i_bframe_pyramid ? 2 : param->i_bframe ? 1 : 0;
2598-    /* extra slot with pyramid so that we don't have to override the
2599-     * order of forgetting old pictures */
2600-    sps->vui.i_max_dec_frame_buffering =
2601-    sps->i_num_ref_frames = X264_MIN(16, X264_MAX4(param->i_frame_reference, 1 + sps->vui.i_num_reorder_frames,
2602-                            param->i_bframe_pyramid ? 4 : 1, param->i_dpb_size));
2603-    sps->i_num_ref_frames -= param->i_bframe_pyramid == X264_B_PYRAMID_STRICT;
2604-
2605     sps->vui.b_bitstream_restriction = 1;
2606     if( sps->vui.b_bitstream_restriction )
2607     {
2608diff --git a/encoder/slicetype.c b/encoder/slicetype.c
2609index 4ede8cf..7d69b71 100644
2610--- a/encoder/slicetype.c
2611+++ b/encoder/slicetype.c
2612@@ -1009,7 +1009,7 @@ static int scenecut_internal( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **f
2613     float f_thresh_max = h->param.i_scenecut_threshold / 100.0;
2614     /* magic numbers pulled out of thin air */
2615     float f_thresh_min = f_thresh_max * h->param.i_keyint_min
2616-                         / ( h->param.i_keyint_max * 4 );
2617+                         / ( h->param.i_keyint_max * 4. );
2618     int res;
2619 
2620     if( h->param.i_keyint_min == h->param.i_keyint_max )
2621diff --git a/x264.c b/x264.c
2622index 741570c..0bede93 100644
2623--- a/x264.c
2624+++ b/x264.c
2625@@ -409,7 +409,7 @@ static void Help( x264_param_t *defaults, int longhelp )
2626     H0( "\n" );
2627     H0( "Frame-type options:\n" );
2628     H0( "\n" );
2629-    H0( "  -I, --keyint <integer>      Maximum GOP size [%d]\n", defaults->i_keyint_max );
2630+    H0( "  -I, --keyint <integer or \"infinite\"> Maximum GOP size [%d]\n", defaults->i_keyint_max );
2631     H2( "  -i, --min-keyint <integer>  Minimum GOP size [auto]\n" );
2632     H2( "      --no-scenecut           Disable adaptive I-frame decision\n" );
2633     H2( "      --scenecut <integer>    How aggressively to insert extra I-frames [%d]\n", defaults->i_scenecut_threshold );
2634diff --git a/x264.h b/x264.h
2635index 86f7426..097365a 100644
2636--- a/x264.h
2637+++ b/x264.h
2638@@ -35,7 +35,7 @@
2639 
2640 #include <stdarg.h>
2641 
2642-#define X264_BUILD 101
2643+#define X264_BUILD 102
2644 
2645 /* x264_t:
2646  *      opaque handler for encoder */
2647@@ -152,6 +152,7 @@ typedef struct
2648 #define X264_B_PYRAMID_STRICT        1
2649 #define X264_B_PYRAMID_NORMAL        2
2650 #define X264_KEYINT_MIN_AUTO         0
2651+#define X264_KEYINT_MAX_INFINITE     (1<<30)
2652 #define X264_OPEN_GOP_NONE           0
2653 #define X264_OPEN_GOP_NORMAL         1
2654 #define X264_OPEN_GOP_BLURAY         2
2655-- 
26561.7.1
2657
2658
2659From 5dbafc14927d507544edc7841b126a5fa3840e68 Mon Sep 17 00:00:00 2001
2660From: Oskar Arvidsson <oskar@irock.se>
2661Date: Fri, 2 Jul 2010 04:06:08 +0200
2662Subject: [PATCH 7/7] Support for 9 and 10-bit encoding
2663 Output bit depth is specified on compilation time via --bit-depth.
2664 There is currently almost no assembly code available for high-bit-depth modes, so encoding will be very slow.
2665 Input is still 8-bit only; this will change in the future.
2666
2667Note that very few H.264 decoders support >8 bit depth currently.
2668---
2669 common/arm/mc-c.c      |   42 +++++++-----
2670 common/arm/predict-c.c |    8 ++
2671 common/bitstream.h     |    2 +-
2672 common/common.c        |   17 ++++-
2673 common/common.h        |   41 +++++++----
2674 common/dct.c           |   15 +++-
2675 common/deblock.c       |   43 ++++++++----
2676 common/macroblock.c    |    2 +-
2677 common/macroblock.h    |   66 ++++++++++++-------
2678 common/mc.c            |   33 ++++-----
2679 common/mc.h            |    2 +-
2680 common/pixel.c         |   14 +++-
2681 common/ppc/dct.c       |    2 +
2682 common/ppc/deblock.c   |    2 +
2683 common/ppc/mc.c        |    4 +
2684 common/ppc/pixel.c     |    4 +
2685 common/ppc/predict.c   |    6 ++
2686 common/ppc/quant.c     |    2 +
2687 common/predict.c       |   63 ++++++++++--------
2688 common/quant.c         |   14 ++---
2689 common/set.c           |   25 ++++++-
2690 common/x86/mc-c.c      |   12 +++-
2691 common/x86/predict-c.c |   10 +++
2692 configure              |   17 +++++
2693 encoder/analyse.c      |   80 ++++++++++++-----------
2694 encoder/cabac.c        |   25 ++++---
2695 encoder/cavlc.c        |   24 +++----
2696 encoder/encoder.c      |   45 +++++++++----
2697 encoder/macroblock.h   |    4 +-
2698 encoder/me.h           |    2 +-
2699 encoder/ratecontrol.c  |   49 ++++++++------
2700 encoder/rdo.c          |   10 +--
2701 encoder/set.c          |   11 ++-
2702 encoder/slicetype.c    |   10 ++--
2703 tools/checkasm.c       |  169 +++++++++++++++++++++++++----------------------
2704 x264.c                 |   22 ++++--
2705 x264.h                 |    4 +-
2706 37 files changed, 547 insertions(+), 354 deletions(-)
2707
2708diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c
2709index d294eff..b1106dd 100644
2710--- a/common/arm/mc-c.c
2711+++ b/common/arm/mc-c.c
2712@@ -64,6 +64,19 @@ MC_WEIGHT(_nodenom)
2713 MC_WEIGHT(_offsetadd)
2714 MC_WEIGHT(_offsetsub)
2715 
2716+void x264_mc_copy_w4_neon( uint8_t *, int, uint8_t *, int, int );
2717+void x264_mc_copy_w8_neon( uint8_t *, int, uint8_t *, int, int );
2718+void x264_mc_copy_w16_neon( uint8_t *, int, uint8_t *, int, int );
2719+void x264_mc_copy_w16_aligned_neon( uint8_t *, int, uint8_t *, int, int );
2720+
2721+void x264_mc_chroma_neon( uint8_t *, int, uint8_t *, int, int, int, int, int );
2722+void x264_frame_init_lowres_core_neon( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, int, int, int, int);
2723+
2724+void x264_hpel_filter_v_neon( uint8_t *, uint8_t *, int16_t *, int, int );
2725+void x264_hpel_filter_c_neon( uint8_t *, int16_t *, int );
2726+void x264_hpel_filter_h_neon( uint8_t *, uint8_t *, int );
2727+
2728+#if !X264_HIGH_BIT_DEPTH
2729 static void x264_weight_cache_neon( x264_t *h, x264_weight_t *w )
2730 {
2731     if( w->i_scale == 1<<w->i_denom )
2732@@ -85,14 +98,6 @@ static void x264_weight_cache_neon( x264_t *h, x264_weight_t *w )
2733         w->weightfn = x264_mc_wtab_neon;
2734 }
2735 
2736-void x264_mc_copy_w4_neon( uint8_t *, int, uint8_t *, int, int );
2737-void x264_mc_copy_w8_neon( uint8_t *, int, uint8_t *, int, int );
2738-void x264_mc_copy_w16_neon( uint8_t *, int, uint8_t *, int, int );
2739-void x264_mc_copy_w16_aligned_neon( uint8_t *, int, uint8_t *, int, int );
2740-
2741-void x264_mc_chroma_neon( uint8_t *, int, uint8_t *, int, int, int, int, int );
2742-void x264_frame_init_lowres_core_neon( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, int, int, int, int);
2743-
2744 static void (* const x264_pixel_avg_wtab_neon[6])( uint8_t *, int, uint8_t *, int, uint8_t *, int ) =
2745 {
2746     NULL,
2747@@ -174,10 +179,6 @@ static uint8_t *get_ref_neon( uint8_t *dst,   int *i_dst_stride,
2748     }
2749 }
2750 
2751-void x264_hpel_filter_v_neon( uint8_t *, uint8_t *, int16_t *, int, int );
2752-void x264_hpel_filter_c_neon( uint8_t *, int16_t *, int );
2753-void x264_hpel_filter_h_neon( uint8_t *, uint8_t *, int );
2754-
2755 static void hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
2756                               int stride, int width, int height, int16_t *buf )
2757 {
2758@@ -198,18 +199,22 @@ static void hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8
2759         src  += stride;
2760     }
2761 }
2762+#endif // !X264_HIGH_BIT_DEPTH
2763 
2764 void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
2765 {
2766     if( !(cpu&X264_CPU_ARMV6) )
2767         return;
2768 
2769+#if !X264_HIGH_BIT_DEPTH
2770     pf->prefetch_fenc = x264_prefetch_fenc_arm;
2771     pf->prefetch_ref  = x264_prefetch_ref_arm;
2772+#endif // !X264_HIGH_BIT_DEPTH
2773 
2774     if( !(cpu&X264_CPU_NEON) )
2775         return;
2776 
2777+#if !X264_HIGH_BIT_DEPTH
2778     pf->copy_16x16_unaligned = x264_mc_copy_w16_neon;
2779     pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_neon;
2780     pf->copy[PIXEL_8x8]   = x264_mc_copy_w8_neon;
2781@@ -229,15 +234,16 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
2782     pf->offsetsub = x264_mc_offsetsub_wtab_neon;
2783     pf->weight_cache = x264_weight_cache_neon;
2784 
2785-// Apple's gcc stupidly cannot align stack variables, and ALIGNED_ARRAY can't work on structs
2786-#ifndef SYS_MACOSX
2787-    pf->memcpy_aligned  = x264_memcpy_aligned_neon;
2788-#endif
2789-    pf->memzero_aligned = x264_memzero_aligned_neon;
2790-
2791     pf->mc_chroma = x264_mc_chroma_neon;
2792     pf->mc_luma = mc_luma_neon;
2793     pf->get_ref = get_ref_neon;
2794     pf->hpel_filter = hpel_filter_neon;
2795     pf->frame_init_lowres_core = x264_frame_init_lowres_core_neon;
2796+#endif // !X264_HIGH_BIT_DEPTH
2797+
2798+// Apple's gcc stupidly cannot align stack variables, and ALIGNED_ARRAY can't work on structs
2799+#ifndef SYS_MACOSX
2800+    pf->memcpy_aligned  = x264_memcpy_aligned_neon;
2801+#endif
2802+    pf->memzero_aligned = x264_memzero_aligned_neon;
2803 }
2804diff --git a/common/arm/predict-c.c b/common/arm/predict-c.c
2805index fa7b9f7..b40dc9a 100644
2806--- a/common/arm/predict-c.c
2807+++ b/common/arm/predict-c.c
2808@@ -51,6 +51,7 @@ void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] )
2809     if (!(cpu&X264_CPU_ARMV6))
2810         return;
2811 
2812+#if !X264_HIGH_BIT_DEPTH
2813     pf[I_PRED_4x4_H]   = x264_predict_4x4_h_armv6;
2814     pf[I_PRED_4x4_DC]  = x264_predict_4x4_dc_armv6;
2815     pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_armv6;
2816@@ -59,6 +60,7 @@ void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] )
2817         return;
2818 
2819     pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_neon;
2820+#endif // !X264_HIGH_BIT_DEPTH
2821 }
2822 
2823 void x264_predict_8x8c_init_arm( int cpu, x264_predict_t pf[7] )
2824@@ -66,12 +68,14 @@ void x264_predict_8x8c_init_arm( int cpu, x264_predict_t pf[7] )
2825     if (!(cpu&X264_CPU_NEON))
2826         return;
2827 
2828+#if !X264_HIGH_BIT_DEPTH
2829     pf[I_PRED_CHROMA_DC]      = x264_predict_8x8c_dc_neon;
2830     pf[I_PRED_CHROMA_DC_TOP]  = x264_predict_8x8c_dc_top_neon;
2831     pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left_neon;
2832     pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_neon;
2833     pf[I_PRED_CHROMA_V] = x264_predict_8x8c_v_neon;
2834     pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_neon;
2835+#endif // !X264_HIGH_BIT_DEPTH
2836 }
2837 
2838 void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter )
2839@@ -79,8 +83,10 @@ void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_
2840     if (!(cpu&X264_CPU_NEON))
2841         return;
2842 
2843+#if !X264_HIGH_BIT_DEPTH
2844     pf[I_PRED_8x8_DC]  = x264_predict_8x8_dc_neon;
2845     pf[I_PRED_8x8_H]   = x264_predict_8x8_h_neon;
2846+#endif // !X264_HIGH_BIT_DEPTH
2847 }
2848 
2849 void x264_predict_16x16_init_arm( int cpu, x264_predict_t pf[7] )
2850@@ -88,10 +94,12 @@ void x264_predict_16x16_init_arm( int cpu, x264_predict_t pf[7] )
2851     if (!(cpu&X264_CPU_NEON))
2852         return;
2853 
2854+#if !X264_HIGH_BIT_DEPTH
2855     pf[I_PRED_16x16_DC ]    = x264_predict_16x16_dc_neon;
2856     pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_neon;
2857     pf[I_PRED_16x16_DC_LEFT]= x264_predict_16x16_dc_left_neon;
2858     pf[I_PRED_16x16_H ]     = x264_predict_16x16_h_neon;
2859     pf[I_PRED_16x16_V ]     = x264_predict_16x16_v_neon;
2860     pf[I_PRED_16x16_P ]     = x264_predict_16x16_p_neon;
2861+#endif // !X264_HIGH_BIT_DEPTH
2862 }
2863diff --git a/common/bitstream.h b/common/bitstream.h
2864index dd8118d..318c790 100644
2865--- a/common/bitstream.h
2866+++ b/common/bitstream.h
2867@@ -53,7 +53,7 @@ typedef struct bs_s
2868 typedef struct
2869 {
2870     int     last;
2871-    int16_t level[16];
2872+    dctcoef level[16];
2873     uint8_t run[16];
2874 } x264_run_level_t;
2875 
2876diff --git a/common/common.c b/common/common.c
2877index 14dd716..728dfab 100644
2878--- a/common/common.c
2879+++ b/common/common.c
2880@@ -91,10 +91,10 @@ void x264_param_default( x264_param_t *param )
2881     param->rc.i_vbv_max_bitrate = 0;
2882     param->rc.i_vbv_buffer_size = 0;
2883     param->rc.f_vbv_buffer_init = 0.9;
2884-    param->rc.i_qp_constant = 23;
2885-    param->rc.f_rf_constant = 23;
2886+    param->rc.i_qp_constant = 23 + QP_BD_OFFSET;
2887+    param->rc.f_rf_constant = 23 + QP_BD_OFFSET;
2888     param->rc.i_qp_min = 10;
2889-    param->rc.i_qp_max = 51;
2890+    param->rc.i_qp_max = QP_MAX;
2891     param->rc.i_qp_step = 4;
2892     param->rc.f_ip_factor = 1.4;
2893     param->rc.f_pb_factor = 1.3;
2894@@ -418,6 +418,15 @@ int x264_param_apply_profile( x264_param_t *param, const char *profile )
2895     if( !profile )
2896         return 0;
2897 
2898+#if BIT_DEPTH > 8
2899+    if( !strcasecmp( profile, "baseline" ) || !strcasecmp( profile, "main" ) ||
2900+        !strcasecmp( profile, "high" ) )
2901+    {
2902+        x264_log( NULL, X264_LOG_ERROR, "%s profile doesn't support a bit depth of %d.\n", profile, BIT_DEPTH );
2903+        return -1;
2904+    }
2905+#endif
2906+
2907     if( !strcasecmp( profile, "baseline" ) )
2908     {
2909         param->analyse.b_transform_8x8 = 0;
2910@@ -441,7 +450,7 @@ int x264_param_apply_profile( x264_param_t *param, const char *profile )
2911         param->analyse.b_transform_8x8 = 0;
2912         param->i_cqm_preset = X264_CQM_FLAT;
2913     }
2914-    else if( !strcasecmp( profile, "high" ) )
2915+    else if( !strcasecmp( profile, "high" ) || !strcasecmp( profile, "high10" ) )
2916     {
2917         /* Default */
2918     }
2919diff --git a/common/common.h b/common/common.h
2920index 7b60811..ca27968 100644
2921--- a/common/common.h
2922+++ b/common/common.h
2923@@ -52,10 +52,15 @@ do {\
2924 
2925 #define X264_BFRAME_MAX 16
2926 #define X264_THREAD_MAX 128
2927-#define X264_PCM_COST (386*8)
2928+#define X264_PCM_COST (384*BIT_DEPTH+16)
2929 #define X264_LOOKAHEAD_MAX 250
2930+#define QP_BD_OFFSET (6*(BIT_DEPTH-8))
2931+#define QP_MAX (51+QP_BD_OFFSET)
2932+#define QP_MAX_MAX (51+2*6)
2933+#define LAMBDA_MAX (91 << (BIT_DEPTH-8))
2934+#define PIXEL_MAX ((1 << BIT_DEPTH)-1)
2935 // arbitrary, but low because SATD scores are 1/4 normal
2936-#define X264_LOOKAHEAD_QP 12
2937+#define X264_LOOKAHEAD_QP (12+QP_BD_OFFSET)
2938 
2939 // number of pixels (per thread) in progress at any given time.
2940 // 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety
2941@@ -101,17 +106,23 @@ typedef union { x264_uint128_t i; uint64_t a[2]; uint32_t b[4]; uint16_t c[8]; u
2942 #define CP64(dst,src) M64(dst) = M64(src)
2943 #define CP128(dst,src) M128(dst) = M128(src)
2944 
2945-typedef uint8_t pixel;
2946-typedef uint32_t pixel4;
2947-typedef int16_t dctcoef;
2948+#if X264_HIGH_BIT_DEPTH
2949+    typedef uint16_t pixel;
2950+    typedef uint64_t pixel4;
2951+    typedef int32_t  dctcoef;
2952 
2953-#define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
2954-#define MPIXEL_X4(src) M32(src)
2955-#define CPPIXEL_X4(dst,src) CP32(dst,src)
2956-#define CPPIXEL_X8(dst,src) CP64(dst,src)
2957-#define MDCT_X2(dct) M32(dct)
2958-#define CPDCT_X2(dst,src) CP32(dst,src)
2959-#define CPDCT_X4(dst,src) CP64(dst,src)
2960+#   define PIXEL_SPLAT_X4(x) ((x)*0x0001000100010001ULL)
2961+#   define MPIXEL_X4(src) M64(src)
2962+#else
2963+    typedef uint8_t  pixel;
2964+    typedef uint32_t pixel4;
2965+    typedef int16_t  dctcoef;
2966+
2967+#   define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
2968+#   define MPIXEL_X4(src) M32(src)
2969+#endif
2970+
2971+#define CPPIXEL_X4(dst,src) MPIXEL_X4(dst) = MPIXEL_X4(src)
2972 
2973 #define X264_SCAN8_SIZE (6*8)
2974 #define X264_SCAN8_LUMA_SIZE (5*8)
2975@@ -189,7 +200,7 @@ void x264_init_vlc_tables();
2976 
2977 static ALWAYS_INLINE pixel x264_clip_pixel( int x )
2978 {
2979-    return x&(~255) ? (-x)>>31 : x;
2980+    return ( (x & ~PIXEL_MAX) ? (-x)>>31 & PIXEL_MAX : x );
2981 }
2982 
2983 static ALWAYS_INLINE int x264_clip3( int v, int i_min, int i_max )
2984@@ -449,8 +460,8 @@ struct x264_t
2985     /* mv/ref cost arrays.  Indexed by lambda instead of
2986      * qp because, due to rounding, some quantizers share
2987      * lambdas.  This saves memory. */
2988-    uint16_t *cost_mv[92];
2989-    uint16_t *cost_mv_fpel[92][4];
2990+    uint16_t *cost_mv[LAMBDA_MAX+1];
2991+    uint16_t *cost_mv_fpel[LAMBDA_MAX+1][4];
2992 
2993     const uint8_t   *chroma_qp_table; /* includes both the nonlinear luma->chroma mapping and chroma_qp_offset */
2994 
2995diff --git a/common/dct.c b/common/dct.c
2996index 60dbd55..cd27363 100644
2997--- a/common/dct.c
2998+++ b/common/dct.c
2999@@ -418,6 +418,7 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
3000     dctf->dct4x4dc  = dct4x4dc;
3001     dctf->idct4x4dc = idct4x4dc;
3002 
3003+#if !X264_HIGH_BIT_DEPTH
3004 #if HAVE_MMX
3005     if( cpu&X264_CPU_MMX )
3006     {
3007@@ -515,6 +516,7 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
3008         dctf->add16x16_idct8= x264_add16x16_idct8_neon;
3009     }
3010 #endif
3011+#endif // !X264_HIGH_BIT_DEPTH
3012 }
3013 
3014 void x264_dct_init_weights( void )
3015@@ -599,11 +601,9 @@ static void zigzag_scan_4x4_frame( dctcoef level[16], dctcoef dct[16] )
3016 
3017 static void zigzag_scan_4x4_field( dctcoef level[16], dctcoef dct[16] )
3018 {
3019-    CPDCT_X2( level, dct );
3020+    memcpy( level, dct, 2 * sizeof(dctcoef) );
3021     ZIG(2,0,1) ZIG(3,2,0) ZIG(4,3,0) ZIG(5,1,1)
3022-    CPDCT_X2( level+6, dct+6 );
3023-    CPDCT_X4( level+8, dct+8 );
3024-    CPDCT_X4( level+12, dct+12 );
3025+    memcpy( level+6, dct+6, 10 * sizeof(dctcoef) );
3026 }
3027 
3028 #undef ZIG
3029@@ -618,6 +618,7 @@ static void zigzag_scan_4x4_field( dctcoef level[16], dctcoef dct[16] )
3030     CPPIXEL_X4( p_dst+1*FDEC_STRIDE, p_src+1*FENC_STRIDE );\
3031     CPPIXEL_X4( p_dst+2*FDEC_STRIDE, p_src+2*FENC_STRIDE );\
3032     CPPIXEL_X4( p_dst+3*FDEC_STRIDE, p_src+3*FENC_STRIDE );
3033+#define CPPIXEL_X8(dst,src) ( CPPIXEL_X4(dst,src), CPPIXEL_X4(dst+4,src+4) )
3034 #define COPY8x8\
3035     CPPIXEL_X8( p_dst+0*FDEC_STRIDE, p_src+0*FENC_STRIDE );\
3036     CPPIXEL_X8( p_dst+1*FDEC_STRIDE, p_src+1*FENC_STRIDE );\
3037@@ -709,6 +710,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
3038         pf->sub_8x8    = zigzag_sub_8x8_field;
3039         pf->sub_4x4    = zigzag_sub_4x4_field;
3040         pf->sub_4x4ac  = zigzag_sub_4x4ac_field;
3041+#if !X264_HIGH_BIT_DEPTH
3042 #if HAVE_MMX
3043         if( cpu&X264_CPU_MMXEXT )
3044         {
3045@@ -726,6 +728,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
3046         if( cpu&X264_CPU_ALTIVEC )
3047             pf->scan_4x4   = x264_zigzag_scan_4x4_field_altivec;
3048 #endif
3049+#endif // !X264_HIGH_BIT_DEPTH
3050     }
3051     else
3052     {
3053@@ -734,6 +737,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
3054         pf->sub_8x8    = zigzag_sub_8x8_frame;
3055         pf->sub_4x4    = zigzag_sub_4x4_frame;
3056         pf->sub_4x4ac  = zigzag_sub_4x4ac_frame;
3057+#if !X264_HIGH_BIT_DEPTH
3058 #if HAVE_MMX
3059         if( cpu&X264_CPU_MMX )
3060             pf->scan_4x4 = x264_zigzag_scan_4x4_frame_mmx;
3061@@ -759,13 +763,16 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
3062         if( cpu&X264_CPU_NEON )
3063             pf->scan_4x4 = x264_zigzag_scan_4x4_frame_neon;
3064 #endif
3065+#endif // !X264_HIGH_BIT_DEPTH
3066     }
3067 
3068     pf->interleave_8x8_cavlc = zigzag_interleave_8x8_cavlc;
3069+#if !X264_HIGH_BIT_DEPTH
3070 #if HAVE_MMX
3071     if( cpu&X264_CPU_MMX )
3072         pf->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_mmx;
3073     if( cpu&X264_CPU_SHUFFLE_IS_FAST )
3074         pf->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_sse2;
3075 #endif
3076+#endif // !X264_HIGH_BIT_DEPTH
3077 }
3078diff --git a/common/deblock.c b/common/deblock.c
3079index db9c95d..040e943 100644
3080--- a/common/deblock.c
3081+++ b/common/deblock.c
3082@@ -25,9 +25,10 @@
3083 #include "common.h"
3084 
3085 /* Deblocking filter */
3086-static const uint8_t i_alpha_table[52+12*2] =
3087+static const uint8_t i_alpha_table[52+12*3] =
3088 {
3089      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
3090+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
3091      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
3092      0,  0,  0,  0,  0,  0,  4,  4,  5,  6,
3093      7,  8,  9, 10, 12, 13, 15, 17, 20, 22,
3094@@ -36,8 +37,9 @@ static const uint8_t i_alpha_table[52+12*2] =
3095    255,255,
3096    255,255,255,255,255,255,255,255,255,255,255,255,
3097 };
3098-static const uint8_t i_beta_table[52+12*2] =
3099+static const uint8_t i_beta_table[52+12*3] =
3100 {
3101+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
3102      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
3103      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
3104      0,  0,  0,  0,  0,  0,  2,  2,  2,  3,
3105@@ -47,12 +49,14 @@ static const uint8_t i_beta_table[52+12*2] =
3106     18, 18,
3107     18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
3108 };
3109-static const int8_t i_tc0_table[52+12*2][4] =
3110+static const int8_t i_tc0_table[52+12*3][4] =
3111 {
3112     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
3113     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
3114     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
3115     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
3116+    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
3117+    {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
3118     {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 },
3119     {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 },
3120     {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 },
3121@@ -63,9 +67,9 @@ static const int8_t i_tc0_table[52+12*2][4] =
3122     {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
3123     {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
3124 };
3125-#define alpha_table(x) i_alpha_table[(x)+12]
3126-#define beta_table(x)  i_beta_table[(x)+12]
3127-#define tc0_table(x)   i_tc0_table[(x)+12]
3128+#define alpha_table(x) i_alpha_table[(x)+24]
3129+#define beta_table(x)  i_beta_table[(x)+24]
3130+#define tc0_table(x)   i_tc0_table[(x)+24]
3131 
3132 /* From ffmpeg */
3133 static inline void deblock_luma_c( pixel *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0 )
3134@@ -265,18 +269,19 @@ static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264
3135 
3136 static inline void deblock_edge( x264_t *h, pixel *pix1, pixel *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_inter_t pf_inter )
3137 {
3138-    int index_a = i_qp + h->sh.i_alpha_c0_offset;
3139-    int alpha = alpha_table(index_a);
3140-    int beta  = beta_table(i_qp + h->sh.i_beta_offset);
3141+    int index_a = i_qp-QP_BD_OFFSET + h->sh.i_alpha_c0_offset;
3142+    int index_b = i_qp-QP_BD_OFFSET + h->sh.i_beta_offset;
3143+    int alpha = alpha_table(index_a) << (BIT_DEPTH-8);
3144+    int beta  = beta_table(index_b) << (BIT_DEPTH-8);
3145     int8_t tc[4];
3146 
3147     if( !M32(bS) || !alpha || !beta )
3148         return;
3149 
3150-    tc[0] = tc0_table(index_a)[bS[0]] + b_chroma;
3151-    tc[1] = tc0_table(index_a)[bS[1]] + b_chroma;
3152-    tc[2] = tc0_table(index_a)[bS[2]] + b_chroma;
3153-    tc[3] = tc0_table(index_a)[bS[3]] + b_chroma;
3154+    tc[0] = (tc0_table(index_a)[bS[0]] << (BIT_DEPTH-8)) + b_chroma;
3155+    tc[1] = (tc0_table(index_a)[bS[1]] << (BIT_DEPTH-8)) + b_chroma;
3156+    tc[2] = (tc0_table(index_a)[bS[2]] << (BIT_DEPTH-8)) + b_chroma;
3157+    tc[3] = (tc0_table(index_a)[bS[3]] << (BIT_DEPTH-8)) + b_chroma;
3158 
3159     pf_inter( pix1, i_stride, alpha, beta, tc );
3160     if( b_chroma )
3161@@ -285,8 +290,10 @@ static inline void deblock_edge( x264_t *h, pixel *pix1, pixel *pix2, int i_stri
3162 
3163 static inline void deblock_edge_intra( x264_t *h, pixel *pix1, pixel *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_intra_t pf_intra )
3164 {
3165-    int alpha = alpha_table(i_qp + h->sh.i_alpha_c0_offset);
3166-    int beta  = beta_table(i_qp + h->sh.i_beta_offset);
3167+    int index_a = i_qp-QP_BD_OFFSET + h->sh.i_alpha_c0_offset;
3168+    int index_b = i_qp-QP_BD_OFFSET + h->sh.i_beta_offset;
3169+    int alpha = alpha_table(index_a) << (BIT_DEPTH-8);
3170+    int beta  = beta_table(index_b) << (BIT_DEPTH-8);
3171 
3172     if( !alpha || !beta )
3173         return;
3174@@ -450,6 +457,7 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
3175 #if HAVE_MMX
3176     if( cpu&X264_CPU_MMXEXT )
3177     {
3178+#if !X264_HIGH_BIT_DEPTH
3179         pf->deblock_chroma[1] = x264_deblock_v_chroma_mmxext;
3180         pf->deblock_chroma[0] = x264_deblock_h_chroma_mmxext;
3181         pf->deblock_chroma_intra[1] = x264_deblock_v_chroma_intra_mmxext;
3182@@ -460,10 +468,12 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
3183         pf->deblock_luma_intra[1] = x264_deblock_v_luma_intra_mmxext;
3184         pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_mmxext;
3185 #endif
3186+#endif // !X264_HIGH_BIT_DEPTH
3187         pf->deblock_strength = x264_deblock_strength_mmxext;
3188         if( cpu&X264_CPU_SSE2 )
3189         {
3190             pf->deblock_strength = x264_deblock_strength_sse2;
3191+#if !X264_HIGH_BIT_DEPTH
3192             if( !(cpu&X264_CPU_STACK_MOD4) )
3193             {
3194                 pf->deblock_luma[1] = x264_deblock_v_luma_sse2;
3195@@ -471,12 +481,14 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
3196                 pf->deblock_luma_intra[1] = x264_deblock_v_luma_intra_sse2;
3197                 pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_sse2;
3198             }
3199+#endif // !X264_HIGH_BIT_DEPTH
3200         }
3201         if( cpu&X264_CPU_SSSE3 )
3202             pf->deblock_strength = x264_deblock_strength_ssse3;
3203     }
3204 #endif
3205 
3206+#if !X264_HIGH_BIT_DEPTH
3207 #if HAVE_ALTIVEC
3208     if( cpu&X264_CPU_ALTIVEC )
3209     {
3210@@ -494,4 +506,5 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
3211         pf->deblock_chroma[0] = x264_deblock_h_chroma_neon;
3212    }
3213 #endif
3214+#endif // !X264_HIGH_BIT_DEPTH
3215 }
3216diff --git a/common/macroblock.c b/common/macroblock.c
3217index 4561d8a..f0a624f 100644
3218--- a/common/macroblock.c
3219+++ b/common/macroblock.c
3220@@ -337,7 +337,7 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
3221     int scratch_size = 0;
3222     if( !b_lookahead )
3223     {
3224-        int buf_hpel = (h->thread[0]->fdec->i_width[0]+48) * sizeof(int16_t);
3225+        int buf_hpel = (h->thread[0]->fdec->i_width[0]+48) * sizeof(dctcoef);
3226         int buf_ssim = h->param.analyse.b_ssim * 8 * (h->param.i_width/4+3) * sizeof(int);
3227         int me_range = X264_MIN(h->param.analyse.i_me_range, h->param.analyse.i_mv_range);
3228         int buf_tesa = (h->param.analyse.i_me_method >= X264_ME_ESA) *
3229diff --git a/common/macroblock.h b/common/macroblock.h
3230index 1a4992f..e09cd55 100644
3231--- a/common/macroblock.h
3232+++ b/common/macroblock.h
3233@@ -238,17 +238,30 @@ static const uint16_t block_idx_xy_fdec[16] =
3234     2*4 + 3*4*FDEC_STRIDE, 3*4 + 3*4*FDEC_STRIDE
3235 };
3236 
3237-static const uint8_t i_chroma_qp_table[52+12*2] =
3238+#define QP(qP) ( (qP)+QP_BD_OFFSET )
3239+static const uint8_t i_chroma_qp_table[QP_MAX+1+12*2] =
3240 {
3241-     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
3242-     0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
3243-    10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
3244-    20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
3245-    29, 30, 31, 32, 32, 33, 34, 34, 35, 35,
3246-    36, 36, 37, 37, 37, 38, 38, 38, 39, 39,
3247-    39, 39,
3248-    39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39,
3249+         0,      0,      0,      0,      0,      0,
3250+         0,      0,      0,      0,      0,      0,
3251+#if BIT_DEPTH > 9
3252+   QP(-12),QP(-11),QP(-10), QP(-9), QP(-8), QP(-7),
3253+#endif
3254+#if BIT_DEPTH > 8
3255+    QP(-6), QP(-5), QP(-4), QP(-3), QP(-2), QP(-1),
3256+#endif
3257+     QP(0),  QP(1),  QP(2),  QP(3),  QP(4),  QP(5),
3258+     QP(6),  QP(7),  QP(8),  QP(9), QP(10), QP(11),
3259+    QP(12), QP(13), QP(14), QP(15), QP(16), QP(17),
3260+    QP(18), QP(19), QP(20), QP(21), QP(22), QP(23),
3261+    QP(24), QP(25), QP(26), QP(27), QP(28), QP(29),
3262+    QP(29), QP(30), QP(31), QP(32), QP(32), QP(33),
3263+    QP(34), QP(34), QP(35), QP(35), QP(36), QP(36),
3264+    QP(37), QP(37), QP(37), QP(38), QP(38), QP(38),
3265+    QP(39), QP(39), QP(39), QP(39),
3266+    QP(39), QP(39), QP(39), QP(39), QP(39), QP(39),
3267+    QP(39), QP(39), QP(39), QP(39), QP(39), QP(39),
3268 };
3269+#undef QP
3270 
3271 enum cabac_ctx_block_cat_e
3272 {
3273@@ -340,26 +353,31 @@ static ALWAYS_INLINE uint32_t pack16to32_mask( int a, int b )
3274    return (a&0xFFFF) + (b<<16);
3275 #endif
3276 }
3277+static ALWAYS_INLINE uint64_t pack32to64( uint32_t a, uint32_t b )
3278+{
3279+#ifdef WORDS_BIGENDIAN
3280+   return b + ((uint64_t)a<<32);
3281+#else
3282+   return a + ((uint64_t)b<<32);
3283+#endif
3284+}
3285 
3286-#define pack_pixel_1to2 pack8to16
3287-#define pack_pixel_2to4 pack16to32
3288+#if X264_HIGH_BIT_DEPTH
3289+#   define pack_pixel_1to2 pack16to32
3290+#   define pack_pixel_2to4 pack32to64
3291+#else
3292+#   define pack_pixel_1to2 pack8to16
3293+#   define pack_pixel_2to4 pack16to32
3294+#endif
3295 
3296-#define array_non_zero(a) array_non_zero_int(a, sizeof(a))
3297+#define array_non_zero(a) array_non_zero_int(a, sizeof(a)/sizeof(dctcoef))
3298 #define array_non_zero_int array_non_zero_int
3299 static ALWAYS_INLINE int array_non_zero_int( dctcoef *v, int i_count )
3300 {
3301-    if(i_count == 8)
3302-        return !!M64( &v[0] );
3303-    else if(i_count == 16)
3304-        return !!(M64( &v[0] ) | M64( &v[4] ));
3305-    else if(i_count == 32)
3306-        return !!(M64( &v[0] ) | M64( &v[4] ) | M64( &v[8] ) | M64( &v[12] ));
3307-    else
3308-    {
3309-        for( int i = 0; i < i_count; i+=4 )
3310-            if( M64( &v[i] ) ) return 1;
3311-        return 0;
3312-    }
3313+    for( int i = 0; i < i_count; i++ )
3314+        if( v[i] )
3315+            return 1;
3316+    return 0;
3317 }
3318 static ALWAYS_INLINE int x264_mb_predict_intra4x4_mode( x264_t *h, int idx )
3319 {
3320diff --git a/common/mc.c b/common/mc.c
3321index 9776bec..5ef0682 100644
3322--- a/common/mc.c
3323+++ b/common/mc.c
3324@@ -117,11 +117,14 @@ static void x264_weight_cache( x264_t *h, x264_weight_t *w )
3325 {
3326     w->weightfn = h->mc.weight;
3327 }
3328-#define opscale(x) dst[x] = x264_clip_pixel( ((src[x] * weight->i_scale + (1<<(weight->i_denom - 1))) >> weight->i_denom) + weight->i_offset )
3329-#define opscale_noden(x) dst[x] = x264_clip_pixel( src[x] * weight->i_scale + weight->i_offset )
3330-static inline void mc_weight( pixel *dst, int i_dst_stride, pixel *src, int i_src_stride, const x264_weight_t *weight, int i_width, int i_height )
3331+#define opscale(x) dst[x] = x264_clip_pixel( ((src[x] * scale + (1<<(denom - 1))) >> denom) + offset )
3332+#define opscale_noden(x) dst[x] = x264_clip_pixel( src[x] * scale + offset )
3333+static void mc_weight( pixel *dst, int i_dst_stride, pixel *src, int i_src_stride, const x264_weight_t *weight, int i_width, int i_height )
3334 {
3335-    if( weight->i_denom >= 1 )
3336+    int offset = weight->i_offset << (BIT_DEPTH-8);
3337+    int scale = weight->i_scale;
3338+    int denom = weight->i_denom;
3339+    if( denom >= 1 )
3340     {
3341         for( int y = 0; y < i_height; y++, dst += i_dst_stride, src += i_src_stride )
3342             for( int x = 0; x < i_width; x++ )
3343@@ -135,21 +138,10 @@ static inline void mc_weight( pixel *dst, int i_dst_stride, pixel *src, int i_sr
3344     }
3345 }
3346 
3347-#define MC_WEIGHT_C( name, lx ) \
3348+#define MC_WEIGHT_C( name, width ) \
3349     static void name( pixel *dst, int i_dst_stride, pixel *src, int i_src_stride, const x264_weight_t *weight, int height ) \
3350 { \
3351-    if( weight->i_denom >= 1 ) \
3352-    { \
3353-        for( int y = 0; y < height; y++, dst += i_dst_stride, src += i_src_stride ) \
3354-            for( int x = 0; x < lx; x++ ) \
3355-                opscale( x ); \
3356-    } \
3357-    else \
3358-    { \
3359-        for( int y = 0; y < height; y++, dst += i_dst_stride, src += i_src_stride ) \
3360-            for( int x = 0; x < lx; x++ ) \
3361-                opscale_noden( x ); \
3362-    } \
3363+    mc_weight( dst, i_dst_stride, src, i_src_stride, weight, width, height );\
3364 }
3365 
3366 MC_WEIGHT_C( mc_weight_w20, 20 )
3367@@ -182,7 +174,7 @@ static void mc_copy( pixel *src, int i_src_stride, pixel *dst, int i_dst_stride,
3368 
3369 #define TAPFILTER(pix, d) ((pix)[x-2*d] + (pix)[x+3*d] - 5*((pix)[x-d] + (pix)[x+2*d]) + 20*((pix)[x] + (pix)[x+d]))
3370 static void hpel_filter( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
3371-                         int stride, int width, int height, int16_t *buf )
3372+                         int stride, int width, int height, dctcoef *buf )
3373 {
3374     for( int y = 0; y < height; y++ )
3375     {
3376@@ -301,7 +293,12 @@ void x264_plane_copy_c( pixel *dst, int i_dst,
3377 {
3378     while( h-- )
3379     {
3380+#if X264_HIGH_BIT_DEPTH
3381+        for( int i = 0; i < w; i++ )
3382+            dst[i] = src[i] << (BIT_DEPTH-8);
3383+#else
3384         memcpy( dst, src, w );
3385+#endif
3386         dst += i_dst;
3387         src += i_src;
3388     }
3389diff --git a/common/mc.h b/common/mc.h
3390index bb16d13..cbdf1a6 100644
3391--- a/common/mc.h
3392+++ b/common/mc.h
3393@@ -82,7 +82,7 @@ typedef struct
3394                         uint8_t *src, int i_src, int w, int h);
3395 
3396     void (*hpel_filter)( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
3397-                         int i_stride, int i_width, int i_height, int16_t *buf );
3398+                         int i_stride, int i_width, int i_height, dctcoef *buf );
3399 
3400     /* prefetch the next few macroblocks of fenc or fdec */
3401     void (*prefetch_fenc)( pixel *pix_y, int stride_y,
3402diff --git a/common/pixel.c b/common/pixel.c
3403index 8441c7a..069589f 100644
3404--- a/common/pixel.c
3405+++ b/common/pixel.c
3406@@ -177,7 +177,7 @@ static int pixel_var2_8x8( pixel *pix1, int i_stride1, pixel *pix2, int i_stride
3407         pix2 += i_stride2;
3408     }
3409     sum = abs(sum);
3410-    var = sqr - (sum * sum >> 6);
3411+    var = sqr - ((uint64_t)sum * sum >> 6);
3412     *ssd = sqr;
3413     return var;
3414 }
3415@@ -406,12 +406,14 @@ SAD_X( 8x4 )
3416 SAD_X( 4x8 )
3417 SAD_X( 4x4 )
3418 
3419+#if !X264_HIGH_BIT_DEPTH
3420 #if ARCH_UltraSparc
3421 SAD_X( 16x16_vis )
3422 SAD_X( 16x8_vis )
3423 SAD_X( 8x16_vis )
3424 SAD_X( 8x8_vis )
3425 #endif
3426+#endif // !X264_HIGH_BIT_DEPTH
3427 
3428 /****************************************************************************
3429  * pixel_satd_x4
3430@@ -444,6 +446,7 @@ SATD_X_DECL6( cpu )\
3431 SATD_X( 4x4, cpu )
3432 
3433 SATD_X_DECL7()
3434+#if !X264_HIGH_BIT_DEPTH
3435 #if HAVE_MMX
3436 SATD_X_DECL7( _mmxext )
3437 SATD_X_DECL6( _sse2 )
3438@@ -454,6 +457,7 @@ SATD_X_DECL7( _sse4 )
3439 #if HAVE_ARMV6
3440 SATD_X_DECL7( _neon )
3441 #endif
3442+#endif // !X264_HIGH_BIT_DEPTH
3443 
3444 #define INTRA_MBCMP_8x8( mbcmp )\
3445 void x264_intra_##mbcmp##_x3_8x8( pixel *fenc, pixel edge[33], int res[3] )\
3446@@ -520,8 +524,8 @@ static void ssim_4x4x2_core( const pixel *pix1, int stride1,
3447 
3448 static float ssim_end1( int s1, int s2, int ss, int s12 )
3449 {
3450-    static const int ssim_c1 = (int)(.01*.01*255*255*64 + .5);
3451-    static const int ssim_c2 = (int)(.03*.03*255*255*64*63 + .5);
3452+    static const int ssim_c1 = (int)(.01*.01*PIXEL_MAX*PIXEL_MAX*64 + .5);
3453+    static const int ssim_c2 = (int)(.03*.03*PIXEL_MAX*PIXEL_MAX*64*63 + .5);
3454     int vars = ss*64 - s1*s1 - s2*s2;
3455     int covar = s12*64 - s1*s2;
3456     return (float)(2*s1*s2 + ssim_c1) * (float)(2*covar + ssim_c2)
3457@@ -678,6 +682,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
3458     pixf->intra_sad_x3_16x16  = x264_intra_sad_x3_16x16;
3459     pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16;
3460 
3461+#if !X264_HIGH_BIT_DEPTH
3462 #if HAVE_MMX
3463     if( cpu&X264_CPU_MMX )
3464     {
3465@@ -903,17 +908,20 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
3466         }
3467     }
3468 #endif
3469+#endif // !X264_HIGH_BIT_DEPTH
3470 #if HAVE_ALTIVEC
3471     if( cpu&X264_CPU_ALTIVEC )
3472     {
3473         x264_pixel_altivec_init( pixf );
3474     }
3475 #endif
3476+#if !X264_HIGH_BIT_DEPTH
3477 #if ARCH_UltraSparc
3478     INIT4( sad, _vis );
3479     INIT4( sad_x3, _vis );
3480     INIT4( sad_x4, _vis );
3481 #endif
3482+#endif // !X264_HIGH_BIT_DEPTH
3483 
3484     pixf->ads[PIXEL_8x16] =
3485     pixf->ads[PIXEL_8x4] =
3486diff --git a/common/ppc/dct.c b/common/ppc/dct.c
3487index eb223ae..85d5ce7 100644
3488--- a/common/ppc/dct.c
3489+++ b/common/ppc/dct.c
3490@@ -24,6 +24,7 @@
3491 #include "common/common.h"
3492 #include "ppccommon.h"
3493 
3494+#if !X264_HIGH_BIT_DEPTH
3495 #define VEC_DCT(a0,a1,a2,a3,b0,b1,b2,b3) \
3496     b1 = vec_add( a0, a3 );              \
3497     b3 = vec_add( a1, a2 );              \
3498@@ -482,4 +483,5 @@ void x264_zigzag_scan_4x4_field_altivec( int16_t level[16], int16_t dct[4][4] )
3499     vec_st( tmp0v, 0x00, level );
3500     vec_st( tmp1v, 0x10, level );
3501 }
3502+#endif // !X264_HIGH_BIT_DEPTH
3503 
3504diff --git a/common/ppc/deblock.c b/common/ppc/deblock.c
3505index 0c8d2d4..986710d 100644
3506--- a/common/ppc/deblock.c
3507+++ b/common/ppc/deblock.c
3508@@ -21,6 +21,7 @@
3509 #include "common/common.h"
3510 #include "ppccommon.h"
3511 
3512+#if !X264_HIGH_BIT_DEPTH
3513 #define transpose4x16(r0, r1, r2, r3)        \
3514 {                                            \
3515     register vec_u8_t r4;                    \
3516@@ -292,3 +293,4 @@ void x264_deblock_h_luma_altivec( uint8_t *pix, int stride, int alpha, int beta,
3517     transpose4x16(line1, line2, line3, line4);
3518     write16x4(pix-2, stride, line1, line2, line3, line4);
3519 }
3520+#endif // !X264_HIGH_BIT_DEPTH
3521diff --git a/common/ppc/mc.c b/common/ppc/mc.c
3522index 7ad8050..744a804 100644
3523--- a/common/ppc/mc.c
3524+++ b/common/ppc/mc.c
3525@@ -33,6 +33,7 @@
3526 #include "mc.h"
3527 #include "ppccommon.h"
3528 
3529+#if !X264_HIGH_BIT_DEPTH
3530 typedef void (*pf_mc_t)( uint8_t *src, int i_src,
3531                          uint8_t *dst, int i_dst, int i_height );
3532 
3533@@ -792,9 +793,11 @@ static void frame_init_lowres_core_altivec( uint8_t *src0, uint8_t *dst0, uint8_
3534         dstc += dst_stride;
3535     }
3536 }
3537+#endif // !X264_HIGH_BIT_DEPTH
3538 
3539 void x264_mc_altivec_init( x264_mc_functions_t *pf )
3540 {
3541+#if !X264_HIGH_BIT_DEPTH
3542     pf->mc_luma   = mc_luma_altivec;
3543     pf->get_ref   = get_ref_altivec;
3544     pf->mc_chroma = mc_chroma_altivec;
3545@@ -804,4 +807,5 @@ void x264_mc_altivec_init( x264_mc_functions_t *pf )
3546 
3547     pf->hpel_filter = x264_hpel_filter_altivec;
3548     pf->frame_init_lowres_core = frame_init_lowres_core_altivec;
3549+#endif // !X264_HIGH_BIT_DEPTH
3550 }
3551diff --git a/common/ppc/pixel.c b/common/ppc/pixel.c
3552index 3f99606..bd5f547 100644
3553--- a/common/ppc/pixel.c
3554+++ b/common/ppc/pixel.c
3555@@ -24,6 +24,7 @@
3556 #include "common/common.h"
3557 #include "ppccommon.h"
3558 
3559+#if !X264_HIGH_BIT_DEPTH
3560 /***********************************************************************
3561  * SAD routines
3562  **********************************************************************/
3563@@ -1979,12 +1980,14 @@ static void ssim_4x4x2_core_altivec( const uint8_t *pix1, int stride1,
3564     sums[0][3] = temp[0];
3565     sums[1][3] = temp[1];
3566 }
3567+#endif // !X264_HIGH_BIT_DEPTH
3568 
3569 /****************************************************************************
3570  * x264_pixel_init:
3571  ****************************************************************************/
3572 void x264_pixel_altivec_init( x264_pixel_function_t *pixf )
3573 {
3574+#if !X264_HIGH_BIT_DEPTH
3575     pixf->sad[PIXEL_16x16]  = pixel_sad_16x16_altivec;
3576     pixf->sad[PIXEL_8x16]   = pixel_sad_8x16_altivec;
3577     pixf->sad[PIXEL_16x8]   = pixel_sad_16x8_altivec;
3578@@ -2023,4 +2026,5 @@ void x264_pixel_altivec_init( x264_pixel_function_t *pixf )
3579     pixf->hadamard_ac[PIXEL_8x8]   = x264_pixel_hadamard_ac_8x8_altivec;
3580 
3581     pixf->ssim_4x4x2_core = ssim_4x4x2_core_altivec;
3582+#endif // !X264_HIGH_BIT_DEPTH
3583 }
3584diff --git a/common/ppc/predict.c b/common/ppc/predict.c
3585index 3fb1a2b..c71dbb5 100644
3586--- a/common/ppc/predict.c
3587+++ b/common/ppc/predict.c
3588@@ -23,6 +23,7 @@
3589 #include "pixel.h"
3590 #include "ppccommon.h"
3591 
3592+#if !X264_HIGH_BIT_DEPTH
3593 static void predict_8x8c_p_altivec( uint8_t *src )
3594 {
3595     int H = 0, V = 0;
3596@@ -194,6 +195,7 @@ static void predict_16x16_v_altivec( uint8_t *src )
3597         src += FDEC_STRIDE;
3598     }
3599 }
3600+#endif // !X264_HIGH_BIT_DEPTH
3601 
3602 
3603 /****************************************************************************
3604@@ -201,6 +203,7 @@ static void predict_16x16_v_altivec( uint8_t *src )
3605  ****************************************************************************/
3606 void x264_predict_16x16_init_altivec( x264_predict_t pf[7] )
3607 {
3608+#if !X264_HIGH_BIT_DEPTH
3609     pf[I_PRED_16x16_V ]      = predict_16x16_v_altivec;
3610     pf[I_PRED_16x16_H ]      = predict_16x16_h_altivec;
3611     pf[I_PRED_16x16_DC]      = predict_16x16_dc_altivec;
3612@@ -208,9 +211,12 @@ void x264_predict_16x16_init_altivec( x264_predict_t pf[7] )
3613     pf[I_PRED_16x16_DC_LEFT] = predict_16x16_dc_left_altivec;
3614     pf[I_PRED_16x16_DC_TOP ] = predict_16x16_dc_top_altivec;
3615     pf[I_PRED_16x16_DC_128 ] = predict_16x16_dc_128_altivec;
3616+#endif // !X264_HIGH_BIT_DEPTH
3617 }
3618 
3619 void x264_predict_8x8c_init_altivec( x264_predict_t pf[7] )
3620 {
3621+#if !X264_HIGH_BIT_DEPTH
3622     pf[I_PRED_CHROMA_P]       = predict_8x8c_p_altivec;
3623+#endif // !X264_HIGH_BIT_DEPTH
3624 }
3625diff --git a/common/ppc/quant.c b/common/ppc/quant.c
3626index 6f41a06..ffd6a1b 100644
3627--- a/common/ppc/quant.c
3628+++ b/common/ppc/quant.c
3629@@ -22,6 +22,7 @@
3630 #include "ppccommon.h"
3631 #include "quant.h"
3632 
3633+#if !X264_HIGH_BIT_DEPTH
3634 // quant of a whole 4x4 block, unrolled 2x and "pre-scheduled"
3635 #define QUANT_16_U( idx0, idx1 )                                    \
3636 {                                                                   \
3637@@ -360,4 +361,5 @@ void x264_dequant_8x8_altivec( int16_t dct[8][8], int dequant_mf[6][8][8], int i
3638             DEQUANT_SHR();
3639     }
3640 }
3641+#endif // !X264_HIGH_BIT_DEPTH
3642 
3643diff --git a/common/predict.c b/common/predict.c
3644index 79ec1fc..dc92083 100644
3645--- a/common/predict.c
3646+++ b/common/predict.c
3647@@ -53,40 +53,40 @@
3648 
3649 void x264_predict_16x16_dc_c( pixel *src )
3650 {
3651-    pixel4 dc = 0;
3652+    int dc = 0;
3653 
3654     for( int i = 0; i < 16; i++ )
3655     {
3656         dc += src[-1 + i * FDEC_STRIDE];
3657         dc += src[i - FDEC_STRIDE];
3658     }
3659-    dc = PIXEL_SPLAT_X4( ( dc + 16 ) >> 5 );
3660+    pixel4 dcsplat = PIXEL_SPLAT_X4( ( dc + 16 ) >> 5 );
3661 
3662-    PREDICT_16x16_DC( dc );
3663+    PREDICT_16x16_DC( dcsplat );
3664 }
3665 static void x264_predict_16x16_dc_left_c( pixel *src )
3666 {
3667-    pixel4 dc = 0;
3668+    int dc = 0;
3669 
3670     for( int i = 0; i < 16; i++ )
3671         dc += src[-1 + i * FDEC_STRIDE];
3672-    dc = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 );
3673+    pixel4 dcsplat = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 );
3674 
3675-    PREDICT_16x16_DC( dc );
3676+    PREDICT_16x16_DC( dcsplat );
3677 }
3678 static void x264_predict_16x16_dc_top_c( pixel *src )
3679 {
3680-    pixel4 dc = 0;
3681+    int dc = 0;
3682 
3683     for( int i = 0; i < 16; i++ )
3684         dc += src[i - FDEC_STRIDE];
3685-    dc = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 );
3686+    pixel4 dcsplat = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 );
3687 
3688-    PREDICT_16x16_DC( dc );
3689+    PREDICT_16x16_DC( dcsplat );
3690 }
3691 static void x264_predict_16x16_dc_128_c( pixel *src )
3692 {
3693-    PREDICT_16x16_DC( PIXEL_SPLAT_X4( 0x80 ) );
3694+    PREDICT_16x16_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) );
3695 }
3696 void x264_predict_16x16_h_c( pixel *src )
3697 {
3698@@ -155,53 +155,53 @@ static void x264_predict_8x8c_dc_128_c( pixel *src )
3699 {
3700     for( int y = 0; y < 8; y++ )
3701     {
3702-        MPIXEL_X4( src+0 ) = PIXEL_SPLAT_X4( 0x80 );
3703-        MPIXEL_X4( src+4 ) = PIXEL_SPLAT_X4( 0x80 );
3704+        MPIXEL_X4( src+0 ) = PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) );
3705+        MPIXEL_X4( src+4 ) = PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) );
3706         src += FDEC_STRIDE;
3707     }
3708 }
3709 static void x264_predict_8x8c_dc_left_c( pixel *src )
3710 {
3711-    pixel4 dc0 = 0, dc1 = 0;
3712+    int dc0 = 0, dc1 = 0;
3713 
3714     for( int y = 0; y < 4; y++ )
3715     {
3716         dc0 += src[y * FDEC_STRIDE     - 1];
3717         dc1 += src[(y+4) * FDEC_STRIDE - 1];
3718     }
3719-    dc0 = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
3720-    dc1 = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
3721+    pixel4 dc0splat = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
3722+    pixel4 dc1splat = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
3723 
3724     for( int y = 0; y < 4; y++ )
3725     {
3726-        MPIXEL_X4( src+0 ) = dc0;
3727-        MPIXEL_X4( src+4 ) = dc0;
3728+        MPIXEL_X4( src+0 ) = dc0splat;
3729+        MPIXEL_X4( src+4 ) = dc0splat;
3730         src += FDEC_STRIDE;
3731     }
3732     for( int y = 0; y < 4; y++ )
3733     {
3734-        MPIXEL_X4( src+0 ) = dc1;
3735-        MPIXEL_X4( src+4 ) = dc1;
3736+        MPIXEL_X4( src+0 ) = dc1splat;
3737+        MPIXEL_X4( src+4 ) = dc1splat;
3738         src += FDEC_STRIDE;
3739     }
3740 
3741 }
3742 static void x264_predict_8x8c_dc_top_c( pixel *src )
3743 {
3744-    pixel4 dc0 = 0, dc1 = 0;
3745+    int dc0 = 0, dc1 = 0;
3746 
3747     for( int x = 0; x < 4; x++ )
3748     {
3749         dc0 += src[x     - FDEC_STRIDE];
3750         dc1 += src[x + 4 - FDEC_STRIDE];
3751     }
3752-    dc0 = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
3753-    dc1 = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
3754+    pixel4 dc0splat = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
3755+    pixel4 dc1splat = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
3756 
3757     for( int y = 0; y < 8; y++ )
3758     {
3759-        MPIXEL_X4( src+0 ) = dc0;
3760-        MPIXEL_X4( src+4 ) = dc1;
3761+        MPIXEL_X4( src+0 ) = dc0splat;
3762+        MPIXEL_X4( src+4 ) = dc1splat;
3763         src += FDEC_STRIDE;
3764     }
3765 }
3766@@ -306,7 +306,7 @@ static void x264_predict_8x8c_p_c( pixel *src )
3767 
3768 static void x264_predict_4x4_dc_128_c( pixel *src )
3769 {
3770-    PREDICT_4x4_DC( PIXEL_SPLAT_X4( 0x80 ) );
3771+    PREDICT_4x4_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) );
3772 }
3773 static void x264_predict_4x4_dc_left_c( pixel *src )
3774 {
3775@@ -491,7 +491,8 @@ static void x264_predict_8x8_filter_c( pixel *src, pixel edge[33], int i_neighbo
3776             }
3777             else
3778             {
3779-                M64( edge+24 ) = SRC(7,-1) * 0x0101010101010101ULL;
3780+                MPIXEL_X4( edge+24 ) = PIXEL_SPLAT_X4( SRC(7,-1) );
3781+                MPIXEL_X4( edge+28 ) = PIXEL_SPLAT_X4( SRC(7,-1) );
3782                 edge[32] = SRC(7,-1);
3783             }
3784         }
3785@@ -523,7 +524,7 @@ static void x264_predict_8x8_filter_c( pixel *src, pixel edge[33], int i_neighbo
3786 
3787 static void x264_predict_8x8_dc_128_c( pixel *src, pixel edge[33] )
3788 {
3789-    PREDICT_8x8_DC( PIXEL_SPLAT_X4( 0x80 ) );
3790+    PREDICT_8x8_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) );
3791 }
3792 static void x264_predict_8x8_dc_left_c( pixel *src, pixel edge[33] )
3793 {
3794@@ -554,9 +555,13 @@ void x264_predict_8x8_h_c( pixel *src, pixel edge[33] )
3795 }
3796 void x264_predict_8x8_v_c( pixel *src, pixel edge[33] )
3797 {
3798-    uint64_t top = M64( edge+16 );
3799+    pixel4 top[2] = { MPIXEL_X4( edge+16 ),
3800+                      MPIXEL_X4( edge+20 ) };
3801     for( int y = 0; y < 8; y++ )
3802-        M64( src+y*FDEC_STRIDE ) = top;
3803+    {
3804+        MPIXEL_X4( src+y*FDEC_STRIDE+0 ) = top[0];
3805+        MPIXEL_X4( src+y*FDEC_STRIDE+4 ) = top[1];
3806+    }
3807 }
3808 static void x264_predict_8x8_ddl_c( pixel *src, pixel edge[33] )
3809 {
3810diff --git a/common/quant.c b/common/quant.c
3811index ece52f9..a7b72cf 100644
3812--- a/common/quant.c
3813+++ b/common/quant.c
3814@@ -142,7 +142,7 @@ static void x264_denoise_dct( dctcoef *dct, uint32_t *sum, uint16_t *offset, int
3815     for( int i = 1; i < size; i++ )
3816     {
3817         int level = dct[i];
3818-        int sign = level>>15;
3819+        int sign = level>>31;
3820         level = (level+sign)^sign;
3821         sum[i] += level;
3822         level -= offset[i];
3823@@ -177,10 +177,7 @@ static int ALWAYS_INLINE x264_decimate_score_internal( dctcoef *dct, int i_max )
3824     int i_score = 0;
3825     int idx = i_max - 1;
3826 
3827-    /* Yes, dct[idx-1] is guaranteed to be 32-bit aligned.  idx>=0 instead of 1 works correctly for the same reason */
3828-    while( idx >= 0 && MDCT_X2( &dct[idx-1] ) == 0 )
3829-        idx -= 2;
3830-    if( idx >= 0 && dct[idx] == 0 )
3831+    while( idx >= 0 && dct[idx] == 0 )
3832         idx--;
3833     while( idx >= 0 )
3834     {
3835@@ -216,10 +213,7 @@ static int x264_decimate_score64( dctcoef *dct )
3836 
3837 static int ALWAYS_INLINE x264_coeff_last_internal( dctcoef *l, int i_count )
3838 {
3839-    int i_last;
3840-    for( i_last = i_count-1; i_last >= 3; i_last -= 4 )
3841-        if( M64( l+i_last-3 ) )
3842-            break;
3843+    int i_last = i_count-1;
3844     while( i_last >= 0 && l[i_last] == 0 )
3845         i_last--;
3846     return i_last;
3847@@ -287,6 +281,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
3848     pf->coeff_level_run[  DCT_LUMA_AC] = x264_coeff_level_run15;
3849     pf->coeff_level_run[ DCT_LUMA_4x4] = x264_coeff_level_run16;
3850 
3851+#if !X264_HIGH_BIT_DEPTH
3852 #if HAVE_MMX
3853     if( cpu&X264_CPU_MMX )
3854     {
3855@@ -425,6 +420,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
3856         pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_neon;
3857     }
3858 #endif
3859+#endif // !X264_HIGH_BIT_DEPTH
3860     pf->coeff_last[  DCT_LUMA_DC] = pf->coeff_last[DCT_LUMA_4x4];
3861     pf->coeff_last[DCT_CHROMA_AC] = pf->coeff_last[ DCT_LUMA_AC];
3862     pf->coeff_level_run[  DCT_LUMA_DC] = pf->coeff_level_run[DCT_LUMA_4x4];
3863diff --git a/common/set.c b/common/set.c
3864index 16cff8e..86f3854 100644
3865--- a/common/set.c
3866+++ b/common/set.c
3867@@ -78,6 +78,7 @@ int x264_cqm_init( x264_t *h )
3868                         32 - 11, 32 - 21 };
3869     int max_qp_err = -1;
3870     int max_chroma_qp_err = -1;
3871+    int min_qp_err = QP_MAX+1;
3872 
3873     for( int i = 0; i < 6; i++ )
3874     {
3875@@ -94,9 +95,9 @@ int x264_cqm_init( x264_t *h )
3876         }
3877         else
3878         {
3879-            CHECKED_MALLOC( h->  quant4_mf[i], 52*size*sizeof(uint16_t) );
3880+            CHECKED_MALLOC( h->  quant4_mf[i], (QP_MAX+1)*size*sizeof(uint16_t) );
3881             CHECKED_MALLOC( h->dequant4_mf[i],  6*size*sizeof(int) );
3882-            CHECKED_MALLOC( h->unquant4_mf[i], 52*size*sizeof(int) );
3883+            CHECKED_MALLOC( h->unquant4_mf[i], (QP_MAX+1)*size*sizeof(int) );
3884         }
3885 
3886         for( j = (i<4 ? 0 : 4); j < i; j++ )
3887@@ -106,7 +107,7 @@ int x264_cqm_init( x264_t *h )
3888         if( j < i )
3889             h->quant4_bias[i] = h->quant4_bias[j];
3890         else
3891-            CHECKED_MALLOC( h->quant4_bias[i], 52*size*sizeof(uint16_t) );
3892+            CHECKED_MALLOC( h->quant4_bias[i], (QP_MAX+1)*size*sizeof(uint16_t) );
3893     }
3894 
3895     for( int q = 0; q < 6; q++ )
3896@@ -140,7 +141,7 @@ int x264_cqm_init( x264_t *h )
3897                      quant8_mf[i_list][q][i] = DIV(def_quant8[q][i] * 16, h->pps->scaling_list[4+i_list][i]);
3898             }
3899     }
3900-    for( int q = 0; q < 52; q++ )
3901+    for( int q = 0; q < QP_MAX+1; q++ )
3902     {
3903         int j;
3904         for( int i_list = 0; i_list < 4; i_list++ )
3905@@ -148,6 +149,11 @@ int x264_cqm_init( x264_t *h )
3906             {
3907                 h->unquant4_mf[i_list][q][i] = (1ULL << (q/6 + 15 + 8)) / quant4_mf[i_list][q%6][i];
3908                 h->quant4_mf[i_list][q][i] = j = SHIFT(quant4_mf[i_list][q%6][i], q/6 - 1);
3909+                if( !j )
3910+                {
3911+                    min_qp_err = X264_MIN( min_qp_err, q );
3912+                    continue;
3913+                }
3914                 // round to nearest, unless that would cause the deadzone to be negative
3915                 h->quant4_bias[i_list][q][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j );
3916                 if( j > 0xffff && q > max_qp_err && (i_list == CQM_4IY || i_list == CQM_4PY) )
3917@@ -161,6 +167,11 @@ int x264_cqm_init( x264_t *h )
3918                 {
3919                     h->unquant8_mf[i_list][q][i] = (1ULL << (q/6 + 16 + 8)) / quant8_mf[i_list][q%6][i];
3920                     h->quant8_mf[i_list][q][i] = j = SHIFT(quant8_mf[i_list][q%6][i], q/6);
3921+                    if( !j )
3922+                    {
3923+                        min_qp_err = X264_MIN( min_qp_err, q );
3924+                        continue;
3925+                    }
3926                     h->quant8_bias[i_list][q][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j );
3927                     if( j > 0xffff && q > max_qp_err )
3928                         max_qp_err = q;
3929@@ -179,6 +190,12 @@ int x264_cqm_init( x264_t *h )
3930         x264_log( h, X264_LOG_ERROR, "but min chroma QP is implied to be %d.\n", h->chroma_qp_table[h->param.rc.i_qp_min] );
3931         return -1;
3932     }
3933+    if( !h->mb.b_lossless && min_qp_err <= h->param.rc.i_qp_max )
3934+    {
3935+        x264_log( h, X264_LOG_ERROR, "Quantization underflow.  Your CQM is incompatible with QP > %d,\n", min_qp_err-1 );
3936+        x264_log( h, X264_LOG_ERROR, "but max QP is implied to be %d.\n", h->param.rc.i_qp_max );
3937+        return -1;
3938+    }
3939     return 0;
3940 fail:
3941     x264_cqm_delete( h );
3942diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c
3943index 2dcd671..4bb5f33 100644
3944--- a/common/x86/mc-c.c
3945+++ b/common/x86/mc-c.c
3946@@ -125,6 +125,7 @@ PIXEL_AVG_WALL(sse2)
3947 PIXEL_AVG_WALL(sse2_misalign)
3948 PIXEL_AVG_WALL(cache64_ssse3)
3949 
3950+#if !X264_HIGH_BIT_DEPTH
3951 #define PIXEL_AVG_WTAB(instr, name1, name2, name3, name4, name5)\
3952 static void (* const x264_pixel_avg_wtab_##instr[6])( uint8_t *, int, uint8_t *, int, uint8_t *, int ) =\
3953 {\
3954@@ -355,24 +356,28 @@ static void x264_plane_copy_mmxext( uint8_t *dst, int i_dst, uint8_t *src, int i
3955         x264_plane_copy_core_mmxext( dst+i_dst, i_dst, src+i_src, i_src, (w+15)&~15, h-1 );
3956     }
3957 }
3958+#endif // !X264_HIGH_BIT_DEPTH
3959 
3960 void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
3961 {
3962     if( !(cpu&X264_CPU_MMX) )
3963         return;
3964 
3965+    pf->memcpy_aligned = x264_memcpy_aligned_mmx;
3966+    pf->memzero_aligned = x264_memzero_aligned_mmx;
3967+#if !X264_HIGH_BIT_DEPTH
3968     pf->copy_16x16_unaligned = x264_mc_copy_w16_mmx;
3969     pf->copy[PIXEL_16x16] = x264_mc_copy_w16_mmx;
3970     pf->copy[PIXEL_8x8]   = x264_mc_copy_w8_mmx;
3971     pf->copy[PIXEL_4x4]   = x264_mc_copy_w4_mmx;
3972-    pf->memcpy_aligned = x264_memcpy_aligned_mmx;
3973-    pf->memzero_aligned = x264_memzero_aligned_mmx;
3974     pf->integral_init4v = x264_integral_init4v_mmx;
3975     pf->integral_init8v = x264_integral_init8v_mmx;
3976+#endif // !X264_HIGH_BIT_DEPTH
3977 
3978     if( !(cpu&X264_CPU_MMXEXT) )
3979         return;
3980 
3981+#if !X264_HIGH_BIT_DEPTH
3982     pf->mc_luma = mc_luma_mmxext;
3983     pf->get_ref = get_ref_mmxext;
3984     pf->mc_chroma = x264_mc_chroma_mmxext;
3985@@ -412,12 +417,14 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
3986         pf->frame_init_lowres_core = x264_frame_init_lowres_core_cache32_mmxext;
3987     }
3988 #endif
3989+#endif // !X264_HIGH_BIT_DEPTH
3990 
3991     if( !(cpu&X264_CPU_SSE2) )
3992         return;
3993 
3994     pf->memcpy_aligned = x264_memcpy_aligned_sse2;
3995     pf->memzero_aligned = x264_memzero_aligned_sse2;
3996+#if !X264_HIGH_BIT_DEPTH
3997     pf->integral_init4v = x264_integral_init4v_sse2;
3998     pf->integral_init8v = x264_integral_init8v_sse2;
3999     pf->hpel_filter = x264_hpel_filter_sse2_amd;
4000@@ -492,4 +499,5 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
4001 
4002     pf->integral_init4h = x264_integral_init4h_sse4;
4003     pf->integral_init8h = x264_integral_init8h_sse4;
4004+#endif // !X264_HIGH_BIT_DEPTH
4005 }
4006diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c
4007index e771431..4004265 100644
4008--- a/common/x86/predict-c.c
4009+++ b/common/x86/predict-c.c
4010@@ -75,6 +75,7 @@
4011  void x264_predict_16x16_v_sse2( uint8_t *src );
4012  void x264_predict_16x16_p_core_sse2( uint8_t *src, int i00, int b, int c );
4013 
4014+#if !X264_HIGH_BIT_DEPTH
4015 ALIGNED_8( static const int8_t pb_12345678[8] ) = {1,2,3,4,5,6,7,8};
4016 ALIGNED_8( static const int8_t pb_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
4017 ALIGNED_8( static const int8_t pb_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
4018@@ -364,6 +365,7 @@ INTRA_SA8D_X3(ssse3)
4019 #else
4020 INTRA_SA8D_X3(mmxext)
4021 #endif
4022+#endif // !X264_HIGH_BIT_DEPTH
4023 
4024 /****************************************************************************
4025  * Exported functions:
4026@@ -372,6 +374,7 @@ void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] )
4027 {
4028     if( !(cpu&X264_CPU_MMX) )
4029         return;
4030+#if !X264_HIGH_BIT_DEPTH
4031     pf[I_PRED_16x16_V]       = x264_predict_16x16_v_mmx;
4032     if( !(cpu&X264_CPU_MMXEXT) )
4033         return;
4034@@ -397,12 +400,14 @@ void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] )
4035 #ifdef __GNUC__
4036     pf[I_PRED_16x16_P]       = x264_predict_16x16_p_ssse3;
4037 #endif
4038+#endif // !X264_HIGH_BIT_DEPTH
4039 }
4040 
4041 void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] )
4042 {
4043     if( !(cpu&X264_CPU_MMX) )
4044         return;
4045+#if !X264_HIGH_BIT_DEPTH
4046 #if ARCH_X86_64
4047     pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left;
4048 #endif
4049@@ -424,12 +429,14 @@ void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] )
4050 #ifdef __GNUC__
4051     pf[I_PRED_CHROMA_P]       = x264_predict_8x8c_p_ssse3;
4052 #endif
4053+#endif // !X264_HIGH_BIT_DEPTH
4054 }
4055 
4056 void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_8x8_filter )
4057 {
4058     if( !(cpu&X264_CPU_MMXEXT) )
4059         return;
4060+#if !X264_HIGH_BIT_DEPTH
4061     pf[I_PRED_8x8_V]      = x264_predict_8x8_v_mmxext;
4062     pf[I_PRED_8x8_H]      = x264_predict_8x8_h_mmxext;
4063     pf[I_PRED_8x8_DC]     = x264_predict_8x8_dc_mmxext;
4064@@ -456,12 +463,14 @@ void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_
4065     pf[I_PRED_8x8_HD]   = x264_predict_8x8_hd_ssse3;
4066     pf[I_PRED_8x8_HU]   = x264_predict_8x8_hu_ssse3;
4067     *predict_8x8_filter = x264_predict_8x8_filter_ssse3;
4068+#endif // !X264_HIGH_BIT_DEPTH
4069 }
4070 
4071 void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
4072 {
4073     if( !(cpu&X264_CPU_MMXEXT) )
4074         return;
4075+#if !X264_HIGH_BIT_DEPTH
4076     pf[I_PRED_4x4_VR]  = x264_predict_4x4_vr_mmxext;
4077     pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_mmxext;
4078     pf[I_PRED_4x4_VL]  = x264_predict_4x4_vl_mmxext;
4079@@ -474,4 +483,5 @@ void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
4080     pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_ssse3;
4081     pf[I_PRED_4x4_VR]  = x264_predict_4x4_vr_ssse3;
4082     pf[I_PRED_4x4_HD]  = x264_predict_4x4_hd_ssse3;
4083+#endif // !X264_HIGH_BIT_DEPTH
4084 }
4085diff --git a/configure b/configure
4086index 24d15ad..43fbe39 100755
4087--- a/configure
4088+++ b/configure
4089@@ -18,6 +18,7 @@ echo "  --enable-gprof           adds -pg, doesn't strip"
4090 echo "  --enable-visualize       enables visualization (X11 only)"
4091 echo "  --enable-pic             build position-independent code"
4092 echo "  --enable-shared          build libx264.so"
4093+echo "  --bit-depth=BIT_DEPTH    sets output bit depth (8-10), default 8"
4094 echo "  --extra-asflags=EASFLAGS add EASFLAGS to ASFLAGS"
4095 echo "  --extra-cflags=ECFLAGS   add ECFLAGS to CFLAGS"
4096 echo "  --extra-ldflags=ELDFLAGS add ELDFLAGS to LDFLAGS"
4097@@ -124,6 +125,7 @@ gprof="no"
4098 pic="no"
4099 vis="no"
4100 shared="no"
4101+bit_depth="8"
4102 
4103 CFLAGS="$CFLAGS -Wall -I."
4104 LDFLAGS="$LDFLAGS"
4105@@ -208,6 +210,14 @@ for opt do
4106             CFLAGS="$CFLAGS --sysroot=${opt#--sysroot=}"
4107             LDFLAGS="$LDFLAGS --sysroot=${opt#--sysroot=}"
4108             ;;
4109+        --bit-depth=*)
4110+            bit_depth="${opt#--bit-depth=}"
4111+            if [ "$bit_depth" -lt "8" -o "$bit_depth" -gt "10" ]; then
4112+                echo "Supplied bit depth must be in range [8,10]."
4113+                exit 1
4114+            fi
4115+            bit_depth=`expr $bit_depth + 0`
4116+            ;;
4117         *)
4118             echo "Unknown option $opt, ignored"
4119             ;;
4120@@ -644,6 +654,12 @@ if cc_check '' -Wshadow ; then
4121     CFLAGS="-Wshadow $CFLAGS"
4122 fi
4123 
4124+if [ "$bit_depth" -gt "8" ]; then
4125+    define X264_HIGH_BIT_DEPTH
4126+fi
4127+
4128+define BIT_DEPTH $bit_depth
4129+
4130 rm -f conftest*
4131 
4132 # generate config files
4133@@ -724,6 +740,7 @@ gprof:      $gprof
4134 PIC:        $pic
4135 shared:     $shared
4136 visualize:  $vis
4137+bit depth:  $bit_depth
4138 EOF
4139 
4140 echo >> config.log
4141diff --git a/encoder/analyse.c b/encoder/analyse.c
4142index cdbdd1e..93f7eed 100644
4143--- a/encoder/analyse.c
4144+++ b/encoder/analyse.c
4145@@ -134,25 +134,27 @@ typedef struct
4146 } x264_mb_analysis_t;
4147 
4148 /* lambda = pow(2,qp/6-2) */
4149-const uint8_t x264_lambda_tab[52] = {
4150-   1, 1, 1, 1, 1, 1, 1, 1,  /*  0-7 */
4151-   1, 1, 1, 1,              /*  8-11 */
4152-   1, 1, 1, 1, 2, 2, 2, 2,  /* 12-19 */
4153-   3, 3, 3, 4, 4, 4, 5, 6,  /* 20-27 */
4154-   6, 7, 8, 9,10,11,13,14,  /* 28-35 */
4155-  16,18,20,23,25,29,32,36,  /* 36-43 */
4156-  40,45,51,57,64,72,81,91   /* 44-51 */
4157+const uint16_t x264_lambda_tab[QP_MAX_MAX+1] = {
4158+   1,   1,   1,   1,   1,   1,   1,   1, /*  0- 7 */
4159+   1,   1,   1,   1,   1,   1,   1,   1, /*  8-15 */
4160+   2,   2,   2,   2,   3,   3,   3,   4, /* 16-23 */
4161+   4,   4,   5,   6,   6,   7,   8,   9, /* 24-31 */
4162+  10,  11,  13,  14,  16,  18,  20,  23, /* 32-39 */
4163+  25,  29,  32,  36,  40,  45,  51,  57, /* 40-47 */
4164+  64,  72,  81,  91, 102, 114, 128, 144, /* 48-55 */
4165+ 161, 181, 203, 228, 256, 287, 323, 362, /* 56-63 */
4166 };
4167 
4168 /* lambda2 = pow(lambda,2) * .9 * 256 */
4169-const int x264_lambda2_tab[52] = {
4170-    14,      18,      22,      28,     36,     45,     57,     72, /*  0 -  7 */
4171-    91,     115,     145,     182,    230,    290,    365,    460, /*  8 - 15 */
4172-   580,     731,     921,    1161,   1462,   1843,   2322,   2925, /* 16 - 23 */
4173-  3686,    4644,    5851,    7372,   9289,  11703,  14745,  18578, /* 24 - 31 */
4174- 23407,   29491,   37156,   46814,  58982,  74313,  93628, 117964, /* 32 - 39 */
4175-148626,  187257,  235929,  297252, 374514, 471859, 594505, 749029, /* 40 - 47 */
4176-943718, 1189010, 1498059, 1887436                                  /* 48 - 51 */
4177+const int x264_lambda2_tab[QP_MAX_MAX+1] = {
4178+     14,     18,     22,      28,      36,      45,      57,      72, /*  0- 7 */
4179+     91,    115,    145,     182,     230,     290,     365,     460, /*  8-15 */
4180+    580,    731,    921,    1161,    1462,    1843,    2322,    2925, /* 16-23 */
4181+   3686,   4644,   5851,    7372,    9289,   11703,   14745,   18578, /* 24-31 */
4182+  23407,  29491,  37156,   46814,   58982,   74313,   93628,  117964, /* 32-39 */
4183+ 148626, 187257, 235929,  297252,  374514,  471859,  594505,  749029, /* 40-47 */
4184+ 943718,1189010,1498059, 1887436, 2378021, 2996119, 3774873, 4756042, /* 48-55 */
4185+5992238,7549747,9512085,11984476,15099494,19024170,23968953,30198988, /* 56-63 */
4186 };
4187 
4188 const uint8_t x264_exp2_lut[64] = {
4189@@ -188,27 +190,31 @@ const float x264_log2_lz_lut[32] = {
4190 
4191 // should the intra and inter lambdas be different?
4192 // I'm just matching the behaviour of deadzone quant.
4193-static const int x264_trellis_lambda2_tab[2][52] = {
4194+static const int x264_trellis_lambda2_tab[2][QP_MAX_MAX+1] = {
4195     // inter lambda = .85 * .85 * 2**(qp/3. + 10 - LAMBDA_BITS)
4196-    {    46,      58,      73,      92,     117,     147,
4197-        185,     233,     294,     370,     466,     587,
4198-        740,     932,    1174,    1480,    1864,    2349,
4199-       2959,    3728,    4697,    5918,    7457,    9395,
4200-      11837,   14914,   18790,   23674,   29828,   37581,
4201-      47349,   59656,   75163,   94699,  119313,  150326,
4202-     189399,  238627,  300652,  378798,  477255,  601304,
4203-     757596,  954511, 1202608, 1515192, 1909022, 2405217,
4204-    3030384, 3818045, 4810435, 6060769 },
4205+    {      46,      58,      73,      92,     117,     147,
4206+          185,     233,     294,     370,     466,     587,
4207+          740,     932,    1174,    1480,    1864,    2349,
4208+         2959,    3728,    4697,    5918,    7457,    9395,
4209+        11837,   14914,   18790,   23674,   29828,   37581,
4210+        47349,   59656,   75163,   94699,  119313,  150326,
4211+       189399,  238627,  300652,  378798,  477255,  601304,
4212+       757596,  954511, 1202608, 1515192, 1909022, 2405217,
4213+      3030384, 3818045, 4810435, 6060769, 7636091, 9620872,
4214+     12121539,15272182,19241743,24243077,30544363,38483486,
4215+     48486154,61088726,76966972,96972308 },
4216     // intra lambda = .65 * .65 * 2**(qp/3. + 10 - LAMBDA_BITS)
4217-    {    27,      34,      43,      54,      68,      86,
4218-        108,     136,     172,     216,     273,     343,
4219-        433,     545,     687,     865,    1090,    1374,
4220-       1731,    2180,    2747,    3461,    4361,    5494,
4221-       6922,    8721,   10988,   13844,   17442,   21976,
4222-      27688,   34885,   43953,   55377,   69771,   87906,
4223-     110755,  139543,  175813,  221511,  279087,  351627,
4224-     443023,  558174,  703255,  886046, 1116348, 1406511,
4225-    1772093, 2232697, 2813022, 3544186 }
4226+    {      27,      34,      43,      54,      68,      86,
4227+          108,     136,     172,     216,     273,     343,
4228+          433,     545,     687,     865,    1090,    1374,
4229+         1731,    2180,    2747,    3461,    4361,    5494,
4230+         6922,    8721,   10988,   13844,   17442,   21976,
4231+        27688,   34885,   43953,   55377,   69771,   87906,
4232+       110755,  139543,  175813,  221511,  279087,  351627,
4233+       443023,  558174,  703255,  886046, 1116348, 1406511,
4234+      1772093, 2232697, 2813022, 3544186, 4465396, 5626046,
4235+      7088374, 8930791,11252092,14176748,17861583,22504184,
4236+     28353495,35723165,45008368,56706990 }
4237 };
4238 
4239 static const uint16_t x264_chroma_lambda2_offset_tab[] = {
4240@@ -237,7 +243,7 @@ static const uint8_t i_sub_mb_p_cost_table[4] = {
4241 
4242 static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a );
4243 
4244-static uint16_t x264_cost_ref[92][3][33];
4245+static uint16_t x264_cost_ref[LAMBDA_MAX+1][3][33];
4246 static UNUSED x264_pthread_mutex_t cost_ref_mutex = X264_PTHREAD_MUTEX_INITIALIZER;
4247 
4248 int x264_analyse_init_costs( x264_t *h, int qp )
4249@@ -275,7 +281,7 @@ fail:
4250 
4251 void x264_analyse_free_costs( x264_t *h )
4252 {
4253-    for( int i = 0; i < 92; i++ )
4254+    for( int i = 0; i < LAMBDA_MAX+1; i++ )
4255     {
4256         if( h->cost_mv[i] )
4257             x264_free( h->cost_mv[i] - 2*4*2048 );
4258diff --git a/encoder/cabac.c b/encoder/cabac.c
4259index 8bd40f1..e82d7e9 100644
4260--- a/encoder/cabac.c
4261+++ b/encoder/cabac.c
4262@@ -262,9 +262,9 @@ static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
4263     if( i_dqp != 0 )
4264     {
4265         int val = i_dqp <= 0 ? (-2*i_dqp) : (2*i_dqp - 1);
4266-        /* dqp is interpreted modulo 52 */
4267-        if( val >= 51 && val != 52 )
4268-            val = 103 - val;
4269+        /* dqp is interpreted modulo (QP_MAX+1) */
4270+        if( val >= QP_MAX && val != QP_MAX+1 )
4271+            val = 2*QP_MAX+1 - val;
4272         do
4273         {
4274             x264_cabac_encode_decision( cb, 60 + ctx, 1 );
4275@@ -767,15 +767,18 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
4276         i_mb_pos_tex = x264_cabac_pos( cb );
4277         h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
4278 
4279-        memcpy( cb->p, h->mb.pic.p_fenc[0], 256 );
4280-        cb->p += 256;
4281-        for( int i = 0; i < 8; i++ )
4282-            memcpy( cb->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 );
4283-        cb->p += 64;
4284-        for( int i = 0; i < 8; i++ )
4285-            memcpy( cb->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
4286-        cb->p += 64;
4287+        bs_t s;
4288+        bs_init( &s, cb->p, cb->p_end - cb->p );
4289 
4290+        for( int i = 0; i < 256; i++ )
4291+            bs_write( &s, BIT_DEPTH, h->mb.pic.p_fenc[0][i] );
4292+        for( int ch = 0; ch < 2; ch++ )
4293+            for( int i = 0; i < 8; i++ )
4294+                for( int j = 0; j < 8; j++ )
4295+                    bs_write( &s, BIT_DEPTH, h->mb.pic.p_fenc[ch][i*FENC_STRIDE+j] );
4296+
4297+        bs_flush( &s );
4298+        cb->p = s.p;
4299         x264_cabac_encode_init_core( cb );
4300 
4301         h->stat.frame.i_tex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex;
4302diff --git a/encoder/cavlc.c b/encoder/cavlc.c
4303index e2f60b1..632ed41 100644
4304--- a/encoder/cavlc.c
4305+++ b/encoder/cavlc.c
4306@@ -66,7 +66,7 @@ static inline int block_residual_write_cavlc_escape( x264_t *h, int i_suffix_len
4307     bs_t *s = &h->out.bs;
4308     static const uint16_t next_suffix[7] = { 0, 3, 6, 12, 24, 48, 0xffff };
4309     int i_level_prefix = 15;
4310-    int mask = level >> 15;
4311+    int mask = level >> 31;
4312     int abs_level = (level^mask)-mask;
4313     int i_level_code = abs_level*2-mask-2;
4314     if( ( i_level_code >> i_suffix_length ) < 15 )
4315@@ -219,10 +219,10 @@ static void cavlc_qp_delta( x264_t *h )
4316 
4317     if( i_dqp )
4318     {
4319-        if( i_dqp < -26 )
4320-            i_dqp += 52;
4321-        else if( i_dqp > 25 )
4322-            i_dqp -= 52;
4323+        if( i_dqp < -(QP_MAX+1)/2 )
4324+            i_dqp += QP_MAX+1;
4325+        else if( i_dqp > QP_MAX/2 )
4326+            i_dqp -= QP_MAX+1;
4327     }
4328     bs_write_se( s, i_dqp );
4329 }
4330@@ -309,14 +309,12 @@ void x264_macroblock_write_cavlc( x264_t *h )
4331 
4332         bs_align_0( s );
4333 
4334-        memcpy( s->p, h->mb.pic.p_fenc[0], 256 );
4335-        s->p += 256;
4336-        for( int i = 0; i < 8; i++ )
4337-            memcpy( s->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 );
4338-        s->p += 64;
4339-        for( int i = 0; i < 8; i++ )
4340-            memcpy( s->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
4341-        s->p += 64;
4342+        for( int i = 0; i < 256; i++ )
4343+            bs_write( s, BIT_DEPTH, h->mb.pic.p_fenc[0][i] );
4344+        for( int ch = 0; ch < 2; ch++ )
4345+            for( int i = 0; i < 8; i++ )
4346+                for( int j = 0; j < 8; j++ )
4347+                    bs_write( s, BIT_DEPTH, h->mb.pic.p_fenc[ch][i*FENC_STRIDE+j] );
4348 
4349         bs_init( s, s->p, s->p_end - s->p );
4350         s->p_start = p_start;
4351diff --git a/encoder/encoder.c b/encoder/encoder.c
4352index 31cb84a..f7e0e38 100644
4353--- a/encoder/encoder.c
4354+++ b/encoder/encoder.c
4355@@ -51,7 +51,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
4356  ****************************************************************************/
4357 static float x264_psnr( int64_t i_sqe, int64_t i_size )
4358 {
4359-    double f_mse = (double)i_sqe / ((double)65025.0 * (double)i_size);
4360+    double f_mse = (double)i_sqe / (PIXEL_MAX*PIXEL_MAX * (double)i_size);
4361     if( f_mse <= 0.0000000001 ) /* Max 100dB */
4362         return 100;
4363 
4364@@ -68,11 +68,13 @@ static void x264_frame_dump( x264_t *h )
4365     FILE *f = fopen( h->param.psz_dump_yuv, "r+b" );
4366     if( !f )
4367         return;
4368+    int bytes_per_pixel = (BIT_DEPTH+7)/8;
4369     /* Write the frame in display order */
4370-    fseek( f, (uint64_t)h->fdec->i_frame * h->param.i_height * h->param.i_width * 3/2, SEEK_SET );
4371+    fseek( f, (uint64_t)h->fdec->i_frame * h->param.i_height * h->param.i_width * 3/2 * bytes_per_pixel, SEEK_SET );
4372     for( int i = 0; i < h->fdec->i_plane; i++ )
4373         for( int y = 0; y < h->param.i_height >> !!i; y++ )
4374-            fwrite( &h->fdec->plane[i][y*h->fdec->i_stride[i]], 1, h->param.i_width >> !!i, f );
4375+            for( int j = 0; j < h->param.i_width >> !!i; j++ )
4376+                fwrite( &h->fdec->plane[i][y*h->fdec->i_stride[i]]+j, bytes_per_pixel, 1, f );
4377     fclose( f );
4378 }
4379 
4380@@ -469,8 +471,8 @@ static int x264_validate_parameters( x264_t *h )
4381         x264_log( h, X264_LOG_ERROR, "no ratecontrol method specified\n" );
4382         return -1;
4383     }
4384-    h->param.rc.f_rf_constant = x264_clip3f( h->param.rc.f_rf_constant, 0, 51 );
4385-    h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, 51 );
4386+    h->param.rc.f_rf_constant = x264_clip3f( h->param.rc.f_rf_constant, 0, QP_MAX );
4387+    h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, QP_MAX );
4388     if( h->param.rc.i_rc_method == X264_RC_CRF )
4389     {
4390         h->param.rc.i_qp_constant = h->param.rc.f_rf_constant;
4391@@ -502,12 +504,12 @@ static int x264_validate_parameters( x264_t *h )
4392         float qp_p = h->param.rc.i_qp_constant;
4393         float qp_i = qp_p - 6*log2f( h->param.rc.f_ip_factor );
4394         float qp_b = qp_p + 6*log2f( h->param.rc.f_pb_factor );
4395-        h->param.rc.i_qp_min = x264_clip3( (int)(X264_MIN3( qp_p, qp_i, qp_b )), 0, 51 );
4396-        h->param.rc.i_qp_max = x264_clip3( (int)(X264_MAX3( qp_p, qp_i, qp_b ) + .999), 0, 51 );
4397+        h->param.rc.i_qp_min = x264_clip3( (int)(X264_MIN3( qp_p, qp_i, qp_b )), 0, QP_MAX );
4398+        h->param.rc.i_qp_max = x264_clip3( (int)(X264_MAX3( qp_p, qp_i, qp_b ) + .999), 0, QP_MAX );
4399         h->param.rc.i_aq_mode = 0;
4400         h->param.rc.b_mb_tree = 0;
4401     }
4402-    h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, 51 );
4403+    h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, QP_MAX );
4404     h->param.rc.i_qp_min = x264_clip3( h->param.rc.i_qp_min, 0, h->param.rc.i_qp_max );
4405     if( h->param.rc.i_vbv_buffer_size )
4406     {
4407@@ -1054,8 +1056,9 @@ x264_t *x264_encoder_open( x264_param_t *param )
4408     if( x264_analyse_init_costs( h, X264_LOOKAHEAD_QP ) )
4409         goto fail;
4410 
4411+    static const uint16_t cost_mv_correct[7] = { 24, 47, 95, 189, 379, 757, 1515 };
4412     /* Checks for known miscompilation issues. */
4413-    if( h->cost_mv[1][2013] != 24 )
4414+    if( h->cost_mv[x264_lambda_tab[X264_LOOKAHEAD_QP]][2013] != cost_mv_correct[BIT_DEPTH-8] )
4415     {
4416         x264_log( h, X264_LOG_ERROR, "MV cost test failed: x264 has been miscompiled!\n" );
4417         goto fail;
4418@@ -1147,11 +1150,22 @@ x264_t *x264_encoder_open( x264_param_t *param )
4419         fclose( f );
4420     }
4421 
4422-    x264_log( h, X264_LOG_INFO, "profile %s, level %d.%d\n",
4423-        h->sps->i_profile_idc == PROFILE_BASELINE ? "Baseline" :
4424-        h->sps->i_profile_idc == PROFILE_MAIN ? "Main" :
4425-        h->sps->i_profile_idc == PROFILE_HIGH ? "High" :
4426-        "High 4:4:4 Predictive", h->sps->i_level_idc/10, h->sps->i_level_idc%10 );
4427+    const char *profile = h->sps->i_profile_idc == PROFILE_BASELINE ? "Baseline" :
4428+                          h->sps->i_profile_idc == PROFILE_MAIN ? "Main" :
4429+                          h->sps->i_profile_idc == PROFILE_HIGH ? "High" :
4430+                          h->sps->i_profile_idc == PROFILE_HIGH10 ? "High 10" :
4431+                          "High 4:4:4 Predictive";
4432+
4433+    if( h->sps->i_profile_idc < PROFILE_HIGH10 )
4434+    {
4435+        x264_log( h, X264_LOG_INFO, "profile %s, level %d.%d\n",
4436+            profile, h->sps->i_level_idc/10, h->sps->i_level_idc%10 );
4437+    }
4438+    else
4439+    {
4440+        x264_log( h, X264_LOG_INFO, "profile %s, level %d.%d, bit depth %d\n",
4441+            profile, h->sps->i_level_idc/10, h->sps->i_level_idc%10, BIT_DEPTH );
4442+    }
4443 
4444     return h;
4445 fail:
4446@@ -1836,7 +1850,7 @@ static int x264_slice_write( x264_t *h )
4447         bs_align_1( &h->out.bs );
4448 
4449         /* init cabac */
4450-        x264_cabac_context_init( &h->cabac, h->sh.i_type, h->sh.i_qp, h->sh.i_cabac_init_idc );
4451+        x264_cabac_context_init( &h->cabac, h->sh.i_type, x264_clip3( h->sh.i_qp-QP_BD_OFFSET, 0, 51 ), h->sh.i_cabac_init_idc );
4452         x264_cabac_encode_init ( &h->cabac, h->out.bs.p, h->out.bs.p_end );
4453     }
4454     h->mb.i_last_qp = h->sh.i_qp;
4455@@ -2705,6 +2719,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
4456     for( int i = 0; i < 3; i++ )
4457     {
4458         pic_out->img.i_stride[i] = h->fdec->i_stride[i];
4459+        // FIXME This breaks the API when pixel != uint8_t.
4460         pic_out->img.plane[i] = h->fdec->plane[i];
4461     }
4462 
4463diff --git a/encoder/macroblock.h b/encoder/macroblock.h
4464index b1b02fa..7c83344 100644
4465--- a/encoder/macroblock.h
4466+++ b/encoder/macroblock.h
4467@@ -26,8 +26,8 @@
4468 
4469 #include "common/macroblock.h"
4470 
4471-extern const int x264_lambda2_tab[52];
4472-extern const uint8_t x264_lambda_tab[52];
4473+extern const int x264_lambda2_tab[QP_MAX_MAX+1];
4474+extern const uint16_t x264_lambda_tab[QP_MAX_MAX+1];
4475 
4476 void x264_rdo_init( void );
4477 
4478diff --git a/encoder/me.h b/encoder/me.h
4479index 912b05d..b125f3d 100644
4480--- a/encoder/me.h
4481+++ b/encoder/me.h
4482@@ -68,7 +68,7 @@ void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_wei
4483 void x264_me_refine_bidir_satd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight );
4484 uint64_t x264_rd_cost_part( x264_t *h, int i_lambda2, int i8, int i_pixel );
4485 
4486-extern uint16_t *x264_cost_mv_fpel[92][4];
4487+extern uint16_t *x264_cost_mv_fpel[LAMBDA_MAX+1][4];
4488 
4489 #define COPY1_IF_LT(x,y)\
4490 if((y)<(x))\
4491diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
4492index 6fdaa98..a2c4cfe 100644
4493--- a/encoder/ratecontrol.c
4494+++ b/encoder/ratecontrol.c
4495@@ -219,7 +219,7 @@ static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x2
4496     uint32_t ssd = res >> 32;
4497     frame->i_pixel_sum[i] += sum;
4498     frame->i_pixel_ssd[i] += ssd;
4499-    return ssd - (sum * sum >> shift);
4500+    return ssd - ((uint64_t)sum * sum >> shift);
4501 }
4502 
4503 // Find the total AC energy of the block in all planes.
4504@@ -287,6 +287,7 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_off
4505     {
4506         if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE )
4507         {
4508+            float bit_depth_correction = powf(1 << (BIT_DEPTH-8), 0.5f);
4509             float avg_adj_pow2 = 0.f;
4510             for( int mb_y = 0; mb_y < h->mb.i_mb_height; mb_y++ )
4511                 for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x++ )
4512@@ -299,8 +300,8 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_off
4513                 }
4514             avg_adj /= h->mb.i_mb_count;
4515             avg_adj_pow2 /= h->mb.i_mb_count;
4516-            strength = h->param.rc.f_aq_strength * avg_adj;
4517-            avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - 14.f) / avg_adj;
4518+            strength = h->param.rc.f_aq_strength * avg_adj / bit_depth_correction;
4519+            avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - (14.f * bit_depth_correction)) / avg_adj;
4520         }
4521         else
4522             strength = h->param.rc.f_aq_strength * 1.0397f;
4523@@ -318,7 +319,7 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_off
4524                 else
4525                 {
4526                     uint32_t energy = x264_ac_energy_mb( h, mb_x, mb_y, frame );
4527-                    qp_adj = strength * (x264_log2( X264_MAX(energy, 1) ) - 14.427f);
4528+                    qp_adj = strength * (x264_log2( X264_MAX(energy, 1) ) - (14.427f + 2*(BIT_DEPTH-8)));
4529                 }
4530                 if( quant_offsets )
4531                     qp_adj += quant_offsets[mb_xy];
4532@@ -620,8 +621,8 @@ int x264_ratecontrol_new( x264_t *h )
4533     rc->ip_offset = 6.0 * log2f( h->param.rc.f_ip_factor );
4534     rc->pb_offset = 6.0 * log2f( h->param.rc.f_pb_factor );
4535     rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant;
4536-    rc->qp_constant[SLICE_TYPE_I] = x264_clip3( h->param.rc.i_qp_constant - rc->ip_offset + 0.5, 0, 51 );
4537-    rc->qp_constant[SLICE_TYPE_B] = x264_clip3( h->param.rc.i_qp_constant + rc->pb_offset + 0.5, 0, 51 );
4538+    rc->qp_constant[SLICE_TYPE_I] = x264_clip3( h->param.rc.i_qp_constant - rc->ip_offset + 0.5, 0, QP_MAX );
4539+    rc->qp_constant[SLICE_TYPE_B] = x264_clip3( h->param.rc.i_qp_constant + rc->pb_offset + 0.5, 0, QP_MAX );
4540     h->mb.ip_offset = rc->ip_offset + 0.5;
4541 
4542     rc->lstep = pow( 2, h->param.rc.i_qp_step / 6.0 );
4543@@ -1180,18 +1181,24 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead )
4544         if( l->level_idc == 41 && h->param.i_nal_hrd )
4545             mincr = 4;
4546 
4547-        /* The spec has a bizarre special case for the first frame. */
4548-        if( h->i_frame == 0 )
4549-        {
4550-            //384 * ( Max( PicSizeInMbs, fR * MaxMBPS ) + MaxMBPS * ( tr( 0 ) - tr,n( 0 ) ) ) / MinCR
4551-            double fr = 1. / 172;
4552-            int pic_size_in_mbs = h->mb.i_mb_width * h->mb.i_mb_height;
4553-            rc->frame_size_maximum = 384 * 8 * X264_MAX( pic_size_in_mbs, fr*l->mbps ) / mincr;
4554-        }
4555+        /* High 10 doesn't require minCR, so just set the maximum to a large value. */
4556+        if( h->sps->i_profile_idc == PROFILE_HIGH10 )
4557+            rc->frame_size_maximum = 1e9;
4558         else
4559         {
4560-            //384 * MaxMBPS * ( tr( n ) - tr( n - 1 ) ) / MinCR
4561-            rc->frame_size_maximum = 384 * 8 * ((double)h->fenc->i_cpb_duration * h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale) * l->mbps / mincr;
4562+            /* The spec has a bizarre special case for the first frame. */
4563+            if( h->i_frame == 0 )
4564+            {
4565+                //384 * ( Max( PicSizeInMbs, fR * MaxMBPS ) + MaxMBPS * ( tr( 0 ) - tr,n( 0 ) ) ) / MinCR
4566+                double fr = 1. / 172;
4567+                int pic_size_in_mbs = h->mb.i_mb_width * h->mb.i_mb_height;
4568+                rc->frame_size_maximum = 384 * BIT_DEPTH * X264_MAX( pic_size_in_mbs, fr*l->mbps ) / mincr;
4569+            }
4570+            else
4571+            {
4572+                //384 * MaxMBPS * ( tr( n ) - tr( n - 1 ) ) / MinCR
4573+                rc->frame_size_maximum = 384 * BIT_DEPTH * ((double)h->fenc->i_cpb_duration * h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale) * l->mbps / mincr;
4574+            }
4575         }
4576     }
4577 
4578@@ -1231,7 +1238,7 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead )
4579 
4580     rc->qpa_rc =
4581     rc->qpa_aq = 0;
4582-    rc->qp = x264_clip3( (int)(q + 0.5), 0, 51 );
4583+    rc->qp = x264_clip3( (int)(q + 0.5), 0, QP_MAX );
4584     h->fdec->f_qp_avg_rc =
4585     h->fdec->f_qp_avg_aq =
4586     rc->qpm = q;
4587@@ -1416,9 +1423,9 @@ int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
4588              * So just calculate the average QP used so far. */
4589             h->param.rc.i_qp_constant = (h->stat.i_frame_count[SLICE_TYPE_P] == 0) ? 24
4590                                       : 1 + h->stat.f_frame_qp[SLICE_TYPE_P] / h->stat.i_frame_count[SLICE_TYPE_P];
4591-            rc->qp_constant[SLICE_TYPE_P] = x264_clip3( h->param.rc.i_qp_constant, 0, 51 );
4592-            rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 );
4593-            rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 );
4594+            rc->qp_constant[SLICE_TYPE_P] = x264_clip3( h->param.rc.i_qp_constant, 0, QP_MAX );
4595+            rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, QP_MAX );
4596+            rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, QP_MAX );
4597 
4598             x264_log(h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d)\n", rc->num_entries);
4599             x264_log(h, X264_LOG_ERROR, "continuing anyway, at constant QP=%d\n", h->param.rc.i_qp_constant);
4600@@ -2652,7 +2659,7 @@ static int init_pass2( x264_t *h )
4601         }
4602         else if( expected_bits > all_available_bits && avgq > h->param.rc.i_qp_max - 2 )
4603         {
4604-            if( h->param.rc.i_qp_max < 51 )
4605+            if( h->param.rc.i_qp_max < QP_MAX )
4606                 x264_log( h, X264_LOG_WARNING, "try increasing target bitrate or increasing qp_max (currently %d)\n", h->param.rc.i_qp_max );
4607             else
4608                 x264_log( h, X264_LOG_WARNING, "try increasing target bitrate\n");
4609diff --git a/encoder/rdo.c b/encoder/rdo.c
4610index afaa894..4fae811 100644
4611--- a/encoder/rdo.c
4612+++ b/encoder/rdo.c
4613@@ -443,10 +443,7 @@ static ALWAYS_INLINE int quant_trellis_cabac( x264_t *h, dctcoef *dct,
4614         /* We only need to zero an empty 4x4 block. 8x8 can be
4615            implicitly emptied via zero nnz, as can dc. */
4616         if( i_coefs == 16 && !dc )
4617-        {
4618-            M128( &dct[0] ) = M128_ZERO;
4619-            M128( &dct[8] ) = M128_ZERO;
4620-        }
4621+            memset( dct, 0, 16 * sizeof(dctcoef) );
4622         return 0;
4623     }
4624 
4625@@ -613,10 +610,7 @@ static ALWAYS_INLINE int quant_trellis_cabac( x264_t *h, dctcoef *dct,
4626     if( bnode == &nodes_cur[0] )
4627     {
4628         if( i_coefs == 16 && !dc )
4629-        {
4630-            M128( &dct[0] ) = M128_ZERO;
4631-            M128( &dct[8] ) = M128_ZERO;
4632-        }
4633+            memset( dct, 0, 16 * sizeof(dctcoef) );
4634         return 0;
4635     }
4636 
4637diff --git a/encoder/set.c b/encoder/set.c
4638index 9e6e736..a520b8a 100644
4639--- a/encoder/set.c
4640+++ b/encoder/set.c
4641@@ -104,6 +104,8 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
4642     sps->b_qpprime_y_zero_transform_bypass = param->rc.i_rc_method == X264_RC_CQP && param->rc.i_qp_constant == 0;
4643     if( sps->b_qpprime_y_zero_transform_bypass )
4644         sps->i_profile_idc  = PROFILE_HIGH444_PREDICTIVE;
4645+    else if( BIT_DEPTH > 8 )
4646+        sps->i_profile_idc  = PROFILE_HIGH10;
4647     else if( param->analyse.b_transform_8x8 || param->i_cqm_preset != X264_CQM_FLAT )
4648         sps->i_profile_idc  = PROFILE_HIGH;
4649     else if( param->b_cabac || param->i_bframe > 0 || param->b_interlaced || param->b_fake_interlaced || param->analyse.i_weighted_pred > 0 )
4650@@ -260,8 +262,8 @@ void x264_sps_write( bs_t *s, x264_sps_t *sps )
4651     if( sps->i_profile_idc >= PROFILE_HIGH )
4652     {
4653         bs_write_ue( s, 1 ); // chroma_format_idc = 4:2:0
4654-        bs_write_ue( s, 0 ); // bit_depth_luma_minus8
4655-        bs_write_ue( s, 0 ); // bit_depth_chroma_minus8
4656+        bs_write_ue( s, BIT_DEPTH-8 ); // bit_depth_luma_minus8
4657+        bs_write_ue( s, BIT_DEPTH-8 ); // bit_depth_chroma_minus8
4658         bs_write( s, 1, sps->b_qpprime_y_zero_transform_bypass );
4659         bs_write( s, 1, 0 ); // seq_scaling_matrix_present_flag
4660     }
4661@@ -488,7 +490,7 @@ void x264_pps_write( bs_t *s, x264_pps_t *pps )
4662     bs_write( s, 1, pps->b_weighted_pred );
4663     bs_write( s, 2, pps->b_weighted_bipred );
4664 
4665-    bs_write_se( s, pps->i_pic_init_qp - 26 );
4666+    bs_write_se( s, pps->i_pic_init_qp - 26 - QP_BD_OFFSET );
4667     bs_write_se( s, pps->i_pic_init_qs - 26 );
4668     bs_write_se( s, pps->i_chroma_qp_index_offset );
4669 
4670@@ -668,7 +670,8 @@ int x264_validate_levels( x264_t *h, int verbose )
4671     int ret = 0;
4672     int mbs = h->sps->i_mb_width * h->sps->i_mb_height;
4673     int dpb = mbs * 384 * h->sps->vui.i_max_dec_frame_buffering;
4674-    int cbp_factor = h->sps->i_profile_idc==PROFILE_HIGH ? 5 : 4;
4675+    int cbp_factor = h->sps->i_profile_idc==PROFILE_HIGH10 ? 12 :
4676+                     h->sps->i_profile_idc==PROFILE_HIGH ? 5 : 4;
4677 
4678     const x264_level_t *l = x264_levels;
4679     while( l->level_idc != 0 && l->level_idc != h->param.i_level_idc )
4680diff --git a/encoder/slicetype.c b/encoder/slicetype.c
4681index 7d69b71..ad2a8c2 100644
4682--- a/encoder/slicetype.c
4683+++ b/encoder/slicetype.c
4684@@ -303,7 +303,7 @@ static void x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
4685                                   (mv1)[0], (mv1)[1], 8, 8, w ); \
4686             h->mc.avg[PIXEL_8x8]( pix1, 16, src1, stride1, src2, stride2, i_bipred_weight ); \
4687         } \
4688-        i_cost = penalty + h->pixf.mbcmp[PIXEL_8x8]( \
4689+        i_cost = penalty * a->i_lambda + h->pixf.mbcmp[PIXEL_8x8]( \
4690                            m[0].p_fenc[0], FENC_STRIDE, pix1, 16 ); \
4691         COPY2_IF_LT( i_bcost, i_cost, list_used, 3 ); \
4692     }
4693@@ -393,9 +393,9 @@ static void x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
4694             }
4695 
4696             x264_me_search( h, &m[l], mvc, i_mvc );
4697-            m[l].cost -= 2; // remove mvcost from skip mbs
4698+            m[l].cost -= 2 * a->i_lambda; // remove mvcost from skip mbs
4699             if( M32( m[l].mv ) )
4700-                m[l].cost += 5;
4701+                m[l].cost += 5 * a->i_lambda;
4702 
4703 skip_motionest:
4704             CP32( fenc_mvs[l], m[l].mv );
4705@@ -418,7 +418,7 @@ lowres_intra_mb:
4706         ALIGNED_ARRAY_16( pixel, edge,[33] );
4707         pixel *pix = &pix1[8+FDEC_STRIDE - 1];
4708         pixel *src = &fenc->lowres[0][i_pel_offset - 1];
4709-        const int intra_penalty = 5;
4710+        const int intra_penalty = 5 * a->i_lambda;
4711         int satds[3];
4712 
4713         memcpy( pix-FDEC_STRIDE, src-i_stride, 17 * sizeof(pixel) );
4714@@ -496,7 +496,7 @@ lowres_intra_mb:
4715         }
4716     }
4717 
4718-    fenc->lowres_costs[b-p0][p1-b][i_mb_xy] = i_bcost + (list_used << LOWRES_COST_SHIFT);
4719+    fenc->lowres_costs[b-p0][p1-b][i_mb_xy] = X264_MIN( i_bcost, LOWRES_COST_MASK ) + (list_used << LOWRES_COST_SHIFT);
4720 }
4721 #undef TRY_BIDIR
4722 
4723diff --git a/tools/checkasm.c b/tools/checkasm.c
4724index 7fa2c0c..a5ffa17 100644
4725--- a/tools/checkasm.c
4726+++ b/tools/checkasm.c
4727@@ -40,8 +40,10 @@
4728 uint8_t *buf1, *buf2;
4729 /* buf3, buf4: used to store output */
4730 uint8_t *buf3, *buf4;
4731-/* pbuf*: point to the same memory as above, just for type convenience */
4732-pixel *pbuf1, *pbuf2, *pbuf3, *pbuf4;
4733+/* pbuf1, pbuf2: initialised to random pixel data and shouldn't write into them. */
4734+pixel *pbuf1, *pbuf2;
4735+/* pbuf3, pbuf4: point to buf3, buf4, just for type convenience */
4736+pixel *pbuf3, *pbuf4;
4737 
4738 int quiet = 0;
4739 
4740@@ -256,11 +258,15 @@ static int check_pixel( int cpu_ref, int cpu_new )
4741         int z = i|(i>>4);
4742         z ^= z>>2;
4743         z ^= z>>1;
4744-        buf3[i] = ~(buf4[i] = -(z&1));
4745+        pbuf4[i] = -(z&1) & PIXEL_MAX;
4746+        pbuf3[i] = ~pbuf4[i] & PIXEL_MAX;
4747     }
4748     // random pattern made of maxed pixel differences, in case an intermediate value overflows
4749     for( int i = 256; i < 0x1000; i++ )
4750-        buf3[i] = ~(buf4[i] = -(buf1[i&~0x88]&1));
4751+    {
4752+        pbuf4[i] = -(pbuf1[i&~0x88]&1) & PIXEL_MAX;
4753+        pbuf3[i] = ~(pbuf4[i]) & PIXEL_MAX;
4754+    }
4755 
4756 #define TEST_PIXEL( name, align ) \
4757     ok = 1, used_asm = 0; \
4758@@ -535,22 +541,22 @@ static int check_dct( int cpu_ref, int cpu_new )
4759         used_asm = 1; \
4760         call_c( dct_c.name, t1, pbuf1, pbuf2 ); \
4761         call_a( dct_asm.name, t2, pbuf1, pbuf2 ); \
4762-        if( memcmp( t1, t2, size ) ) \
4763+        if( memcmp( t1, t2, size*sizeof(dctcoef) ) ) \
4764         { \
4765             ok = 0; \
4766             fprintf( stderr, #name " [FAILED]\n" ); \
4767         } \
4768     }
4769     ok = 1; used_asm = 0;
4770-    TEST_DCT( sub4x4_dct, dct1[0], dct2[0], 16*2 );
4771-    TEST_DCT( sub8x8_dct, dct1, dct2, 16*2*4 );
4772-    TEST_DCT( sub8x8_dct_dc, dctdc[0], dctdc[1], 4*2 );
4773-    TEST_DCT( sub16x16_dct, dct1, dct2, 16*2*16 );
4774+    TEST_DCT( sub4x4_dct, dct1[0], dct2[0], 16 );
4775+    TEST_DCT( sub8x8_dct, dct1, dct2, 16*4 );
4776+    TEST_DCT( sub8x8_dct_dc, dctdc[0], dctdc[1], 4 );
4777+    TEST_DCT( sub16x16_dct, dct1, dct2, 16*16 );
4778     report( "sub_dct4 :" );
4779 
4780     ok = 1; used_asm = 0;
4781-    TEST_DCT( sub8x8_dct8, (void*)dct1[0], (void*)dct2[0], 64*2 );
4782-    TEST_DCT( sub16x16_dct8, (void*)dct1, (void*)dct2, 64*2*4 );
4783+    TEST_DCT( sub8x8_dct8, (void*)dct1[0], (void*)dct2[0], 64 );
4784+    TEST_DCT( sub16x16_dct8, (void*)dct1, (void*)dct2, 64*4 );
4785     report( "sub_dct8 :" );
4786 #undef TEST_DCT
4787 
4788@@ -574,13 +580,13 @@ static int check_dct( int cpu_ref, int cpu_new )
4789     { \
4790         set_func_name( #name ); \
4791         used_asm = 1; \
4792-        memcpy( buf3, buf1, 32*32 * sizeof(pixel) ); \
4793-        memcpy( buf4, buf1, 32*32 * sizeof(pixel) ); \
4794-        memcpy( dct1, src, 512 * sizeof(pixel) ); \
4795-        memcpy( dct2, src, 512 * sizeof(pixel) ); \
4796+        memcpy( pbuf3, pbuf1, 32*32 * sizeof(pixel) ); \
4797+        memcpy( pbuf4, pbuf1, 32*32 * sizeof(pixel) ); \
4798+        memcpy( dct1, src, 256 * sizeof(dctcoef) ); \
4799+        memcpy( dct2, src, 256 * sizeof(dctcoef) ); \
4800         call_c1( dct_c.name, pbuf3, (void*)dct1 ); \
4801         call_a1( dct_asm.name, pbuf4, (void*)dct2 ); \
4802-        if( memcmp( buf3, buf4, 32*32 * sizeof(pixel) ) ) \
4803+        if( memcmp( pbuf3, pbuf4, 32*32 * sizeof(pixel) ) ) \
4804         { \
4805             ok = 0; \
4806             fprintf( stderr, #name " [FAILED]\n" ); \
4807@@ -615,10 +621,10 @@ static int check_dct( int cpu_ref, int cpu_new )
4808                 dct1[0][j] = !i ? (j^j>>1^j>>2^j>>3)&1 ? 4080 : -4080 /* max dc */\
4809                            : i<8 ? (*p++)&1 ? 4080 : -4080 /* max elements */\
4810                            : ((*p++)&0x1fff)-0x1000; /* general case */\
4811-            memcpy( dct2, dct1, 32 );\
4812+            memcpy( dct2, dct1, 16 * sizeof(dctcoef) );\
4813             call_c1( dct_c.name, dct1[0] );\
4814             call_a1( dct_asm.name, dct2[0] );\
4815-            if( memcmp( dct1, dct2, 32 ) )\
4816+            if( memcmp( dct1, dct2, 16 * sizeof(dctcoef) ) )\
4817                 ok = 0;\
4818         }\
4819         call_c2( dct_c.name, dct1[0] );\
4820@@ -658,11 +664,11 @@ static int check_dct( int cpu_ref, int cpu_new )
4821         int nz_a, nz_c; \
4822         set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" ); \
4823         used_asm = 1; \
4824-        memcpy( buf3, buf1, 16*FDEC_STRIDE * sizeof(pixel) ); \
4825-        memcpy( buf4, buf1, 16*FDEC_STRIDE * sizeof(pixel) ); \
4826+        memcpy( pbuf3, pbuf1, 16*FDEC_STRIDE * sizeof(pixel) ); \
4827+        memcpy( pbuf4, pbuf1, 16*FDEC_STRIDE * sizeof(pixel) ); \
4828         nz_c = call_c1( zigzag_c.name, t1, pbuf2, pbuf3 ); \
4829         nz_a = call_a1( zigzag_asm.name, t2, pbuf2, pbuf4 ); \
4830-        if( memcmp( t1, t2, size*sizeof(dctcoef) )|| memcmp( buf3, buf4, 16*FDEC_STRIDE ) || nz_c != nz_a ) \
4831+        if( memcmp( t1, t2, size*sizeof(dctcoef) ) || memcmp( pbuf3, pbuf4, 16*FDEC_STRIDE*sizeof(pixel) ) || nz_c != nz_a ) \
4832         { \
4833             ok = 0; \
4834             fprintf( stderr, #name " [FAILED]\n" ); \
4835@@ -680,8 +686,8 @@ static int check_dct( int cpu_ref, int cpu_new )
4836         used_asm = 1; \
4837         for( int i = 0; i < 2; i++ ) \
4838         { \
4839-            memcpy( buf3, buf2, 16*FDEC_STRIDE * sizeof(pixel) ); \
4840-            memcpy( buf4, buf2, 16*FDEC_STRIDE * sizeof(pixel) ); \
4841+            memcpy( pbuf3, pbuf2, 16*FDEC_STRIDE * sizeof(pixel) ); \
4842+            memcpy( pbuf4, pbuf2, 16*FDEC_STRIDE * sizeof(pixel) ); \
4843             for( int j = 0; j < 4; j++ ) \
4844             { \
4845                 memcpy( pbuf3 + j*FDEC_STRIDE, (i?pbuf1:pbuf2) + j*FENC_STRIDE, 4 * sizeof(pixel) ); \
4846@@ -689,7 +695,7 @@ static int check_dct( int cpu_ref, int cpu_new )
4847             } \
4848             nz_c = call_c1( zigzag_c.name, t1, pbuf2, pbuf3, &dc_c ); \
4849             nz_a = call_a1( zigzag_asm.name, t2, pbuf2, pbuf4, &dc_a ); \
4850-            if( memcmp( t1+1, t2+1, 15*sizeof(dctcoef) ) || memcmp( buf3, buf4, 16*FDEC_STRIDE * sizeof(pixel) ) || nz_c != nz_a || dc_c != dc_a ) \
4851+            if( memcmp( t1+1, t2+1, 15*sizeof(dctcoef) ) || memcmp( pbuf3, pbuf4, 16*FDEC_STRIDE * sizeof(pixel) ) || nz_c != nz_a || dc_c != dc_a ) \
4852             { \
4853                 ok = 0; \
4854                 fprintf( stderr, #name " [FAILED]\n" ); \
4855@@ -779,11 +785,11 @@ static int check_mc( int cpu_ref, int cpu_new )
4856             const x264_weight_t *weight = weight_none; \
4857             set_func_name( "mc_luma_%dx%d", w, h ); \
4858             used_asm = 1; \
4859-            memset( buf3, 0xCD, 1024 ); \
4860-            memset( buf4, 0xCD, 1024 ); \
4861+            for( int i = 0; i < 1024; i++ ) \
4862+                pbuf3[i] = pbuf4[i] = 0xCD; \
4863             call_c( mc_c.mc_luma, dst1, 32, src2, 64, dx, dy, w, h, weight ); \
4864             call_a( mc_a.mc_luma, dst2, 32, src2, 64, dx, dy, w, h, weight ); \
4865-            if( memcmp( buf3, buf4, 1024 ) ) \
4866+            if( memcmp( pbuf3, pbuf4, 1024 * sizeof(pixel) ) ) \
4867             { \
4868                 fprintf( stderr, "mc_luma[mv(%d,%d) %2dx%-2d]     [FAILED]\n", dx, dy, w, h ); \
4869                 ok = 0; \
4870@@ -796,8 +802,8 @@ static int check_mc( int cpu_ref, int cpu_new )
4871             const x264_weight_t *weight = weight_none; \
4872             set_func_name( "get_ref_%dx%d", w, h ); \
4873             used_asm = 1; \
4874-            memset( buf3, 0xCD, 1024 ); \
4875-            memset( buf4, 0xCD, 1024 ); \
4876+            for( int i = 0; i < 1024; i++ ) \
4877+                pbuf3[i] = pbuf4[i] = 0xCD; \
4878             call_c( mc_c.mc_luma, dst1, 32, src2, 64, dx, dy, w, h, weight ); \
4879             ref = (pixel*)call_a( mc_a.get_ref, ref, &ref_stride, src2, 64, dx, dy, w, h, weight ); \
4880             for( int i = 0; i < h; i++ ) \
4881@@ -814,15 +820,15 @@ static int check_mc( int cpu_ref, int cpu_new )
4882         { \
4883             set_func_name( "mc_chroma_%dx%d", w, h ); \
4884             used_asm = 1; \
4885-            memset( buf3, 0xCD, 1024 ); \
4886-            memset( buf4, 0xCD, 1024 ); \
4887+            for( int i = 0; i < 1024; i++ ) \
4888+                pbuf3[i] = pbuf4[i] = 0xCD; \
4889             call_c( mc_c.mc_chroma, dst1, 16, src, 64, dx, dy, w, h ); \
4890             call_a( mc_a.mc_chroma, dst2, 16, src, 64, dx, dy, w, h ); \
4891             /* mc_chroma width=2 may write garbage to the right of dst. ignore that. */ \
4892             for( int j = 0; j < h; j++ ) \
4893                 for( int i = w; i < 4; i++ ) \
4894                     dst2[i+j*16] = dst1[i+j*16]; \
4895-            if( memcmp( buf3, buf4, 1024 ) ) \
4896+            if( memcmp( pbuf3, pbuf4, 1024 * sizeof(pixel) ) ) \
4897             { \
4898                 fprintf( stderr, "mc_chroma[mv(%d,%d) %2dx%-2d]     [FAILED]\n", dx, dy, w, h ); \
4899                 ok = 0; \
4900@@ -867,15 +873,15 @@ static int check_mc( int cpu_ref, int cpu_new )
4901     ok = 1, used_asm = 0; \
4902     for( int i = 0; i < 10; i++ ) \
4903     { \
4904-        memcpy( buf3, pbuf1+320, 320 * sizeof(pixel) ); \
4905-        memcpy( buf4, pbuf1+320, 320 * sizeof(pixel) ); \
4906+        memcpy( pbuf3, pbuf1+320, 320 * sizeof(pixel) ); \
4907+        memcpy( pbuf4, pbuf1+320, 320 * sizeof(pixel) ); \
4908         if( mc_a.name[i] != mc_ref.name[i] ) \
4909         { \
4910             set_func_name( "%s_%s", #name, pixel_names[i] ); \
4911             used_asm = 1; \
4912             call_c1( mc_c.name[i], pbuf3, 16, pbuf2+1, 16, pbuf1+18, 16, weight ); \
4913             call_a1( mc_a.name[i], pbuf4, 16, pbuf2+1, 16, pbuf1+18, 16, weight ); \
4914-            if( memcmp( buf3, buf4, 320 * sizeof(pixel) ) ) \
4915+            if( memcmp( pbuf3, pbuf4, 320 * sizeof(pixel) ) ) \
4916             { \
4917                 ok = 0; \
4918                 fprintf( stderr, #name "[%d]: [FAILED]\n", i ); \
4919@@ -971,8 +977,8 @@ static int check_mc( int cpu_ref, int cpu_new )
4920         void *tmp = pbuf3+49*64;
4921         set_func_name( "hpel_filter" );
4922         ok = 1; used_asm = 1;
4923-        memset( buf3, 0, 4096 * sizeof(pixel) );
4924-        memset( buf4, 0, 4096 * sizeof(pixel) );
4925+        memset( pbuf3, 0, 4096 * sizeof(pixel) );
4926+        memset( pbuf4, 0, 4096 * sizeof(pixel) );
4927         call_c( mc_c.hpel_filter, dstc[0], dstc[1], dstc[2], srchpel, 64, 48, 10, tmp );
4928         call_a( mc_a.hpel_filter, dsta[0], dsta[1], dsta[2], srchpel, 64, 48, 10, tmp );
4929         for( int i = 0; i < 3; i++ )
4930@@ -1030,13 +1036,13 @@ static int check_mc( int cpu_ref, int cpu_new )
4931         int stride = 80;\
4932         set_func_name( #name );\
4933         used_asm = 1;\
4934-        memcpy( buf3, buf1, size*2*stride * sizeof(pixel) );\
4935-        memcpy( buf4, buf1, size*2*stride * sizeof(pixel) );\
4936-        uint16_t *sum = (uint16_t*)buf3;\
4937+        memcpy( pbuf3, pbuf1, size*2*stride * sizeof(pixel) );\
4938+        memcpy( pbuf4, pbuf1, size*2*stride * sizeof(pixel) );\
4939+        uint16_t *sum = (uint16_t*)pbuf3;\
4940         call_c1( mc_c.name, __VA_ARGS__ );\
4941-        sum = (uint16_t*)buf4;\
4942+        sum = (uint16_t*)pbuf4;\
4943         call_a1( mc_a.name, __VA_ARGS__ );\
4944-        if( memcmp( buf3, buf4, (stride-8)*2 * sizeof(pixel) )\
4945+        if( memcmp( pbuf3, pbuf4, (stride-8)*2 * sizeof(pixel) )\
4946             || (size>9 && memcmp( pbuf3+18*stride, pbuf4+18*stride, (stride-8)*2 * sizeof(pixel) )))\
4947             ok = 0;\
4948         call_c2( mc_c.name, __VA_ARGS__ );\
4949@@ -1096,11 +1102,11 @@ static int check_deblock( int cpu_ref, int cpu_new )
4950     /* not exactly the real values of a,b,tc but close enough */
4951     for( int i = 35, a = 255, c = 250; i >= 0; i-- )
4952     {
4953-        alphas[i] = a;
4954-        betas[i] = (i+1)/2;
4955-        tcs[i][0] = tcs[i][3] = (c+6)/10;
4956-        tcs[i][1] = (c+7)/15;
4957-        tcs[i][2] = (c+9)/20;
4958+        alphas[i] = a << (BIT_DEPTH-8);
4959+        betas[i] = (i+1)/2 << (BIT_DEPTH-8);
4960+        tcs[i][0] = tcs[i][3] = (c+6)/10 << (BIT_DEPTH-8);
4961+        tcs[i][1] = (c+7)/15 << (BIT_DEPTH-8);
4962+        tcs[i][2] = (c+9)/20 << (BIT_DEPTH-8);
4963         a = a*9/10;
4964         c = c*9/10;
4965     }
4966@@ -1111,15 +1117,15 @@ static int check_deblock( int cpu_ref, int cpu_new )
4967         int off = 8*32 + (i&15)*4*!align; /* benchmark various alignments of h filter */ \
4968         for( int j = 0; j < 1024; j++ ) \
4969             /* two distributions of random to excersize different failure modes */ \
4970-            buf3[j] = rand() & (i&1 ? 0xf : 0xff ); \
4971-        memcpy( buf4, buf3, 1024 * sizeof(pixel) ); \
4972+            pbuf3[j] = rand() & (i&1 ? 0xf : PIXEL_MAX ); \
4973+        memcpy( pbuf4, pbuf3, 1024 * sizeof(pixel) ); \
4974         if( db_a.name != db_ref.name ) \
4975         { \
4976             set_func_name( #name ); \
4977             used_asm = 1; \
4978             call_c1( db_c.name, pbuf3+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
4979             call_a1( db_a.name, pbuf4+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
4980-            if( memcmp( buf3, buf4, 1024 * sizeof(pixel) ) ) \
4981+            if( memcmp( pbuf3, pbuf4, 1024 * sizeof(pixel) ) ) \
4982             { \
4983                 ok = 0; \
4984                 fprintf( stderr, #name "(a=%d, b=%d): [FAILED]\n", alphas[i], betas[i] ); \
4985@@ -1200,7 +1206,7 @@ static int check_quant( int cpu_ref, int cpu_new )
4986     h->pps = h->pps_array;
4987     x264_param_default( &h->param );
4988     h->chroma_qp_table = i_chroma_qp_table + 12;
4989-    h->param.rc.i_qp_min = 26;
4990+    h->param.rc.i_qp_min = 26 + QP_BD_OFFSET;
4991     h->param.analyse.b_transform_8x8 = 1;
4992 
4993     for( int i_cqm = 0; i_cqm < 4; i_cqm++ )
4994@@ -1219,9 +1225,10 @@ static int check_quant( int cpu_ref, int cpu_new )
4995         }
4996         else
4997         {
4998+            int max_scale = BIT_DEPTH < 10 ? 255 : 228;
4999             if( i_cqm == 2 )
5000                 for( int i = 0; i < 64; i++ )
5001-                    cqm_buf[i] = 10 + rand() % 246;
5002+                    cqm_buf[i] = 10 + rand() % (max_scale - 9);
5003             else
5004                 for( int i = 0; i < 64; i++ )
5005                     cqm_buf[i] = 1;
5006@@ -1260,7 +1267,7 @@ static int check_quant( int cpu_ref, int cpu_new )
5007         { \
5008             set_func_name( #name ); \
5009             used_asms[0] = 1; \
5010-            for( int qp = 51; qp > 0; qp-- ) \
5011+            for( int qp = QP_MAX; qp > 0; qp-- ) \
5012             { \
5013                 for( int j = 0; j < 2; j++ ) \
5014                 { \
5015@@ -1269,7 +1276,7 @@ static int check_quant( int cpu_ref, int cpu_new )
5016                         dct1[i] = dct2[i] = j ? (rand() & 0x1fff) - 0xfff : 0; \
5017                     result_c = call_c1( qf_c.name, dct1, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
5018                     result_a = call_a1( qf_a.name, dct2, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
5019-                    if( memcmp( dct1, dct2, 16*2 ) || result_c != result_a ) \
5020+                    if( memcmp( dct1, dct2, 16*sizeof(dctcoef) ) || result_c != result_a ) \
5021                     { \
5022                         oks[0] = 0; \
5023                         fprintf( stderr, #name "(cqm=%d): [FAILED]\n", i_cqm ); \
5024@@ -1286,14 +1293,14 @@ static int check_quant( int cpu_ref, int cpu_new )
5025         { \
5026             set_func_name( #qname ); \
5027             used_asms[0] = 1; \
5028-            for( int qp = 51; qp > 0; qp-- ) \
5029+            for( int qp = QP_MAX; qp > 0; qp-- ) \
5030             { \
5031                 for( int j = 0; j < 2; j++ ) \
5032                 { \
5033                     INIT_QUANT##w(j) \
5034                     int result_c = call_c1( qf_c.qname, dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
5035                     int result_a = call_a1( qf_a.qname, dct2, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
5036-                    if( memcmp( dct1, dct2, w*w*2 ) || result_c != result_a ) \
5037+                    if( memcmp( dct1, dct2, w*w*sizeof(dctcoef) ) || result_c != result_a ) \
5038                     { \
5039                         oks[0] = 0; \
5040                         fprintf( stderr, #qname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
5041@@ -1317,14 +1324,14 @@ static int check_quant( int cpu_ref, int cpu_new )
5042         { \
5043             set_func_name( "%s_%s", #dqname, i_cqm?"cqm":"flat" ); \
5044             used_asms[1] = 1; \
5045-            for( int qp = 51; qp > 0; qp-- ) \
5046+            for( int qp = QP_MAX; qp > 0; qp-- ) \
5047             { \
5048                 INIT_QUANT##w(1) \
5049                 call_c1( qf_c.qname, dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
5050-                memcpy( dct2, dct1, w*w*2 ); \
5051+                memcpy( dct2, dct1, w*w*sizeof(dctcoef) ); \
5052                 call_c1( qf_c.dqname, dct1, h->dequant##w##_mf[block], qp ); \
5053                 call_a1( qf_a.dqname, dct2, h->dequant##w##_mf[block], qp ); \
5054-                if( memcmp( dct1, dct2, w*w*2 ) ) \
5055+                if( memcmp( dct1, dct2, w*w*sizeof(dctcoef) ) ) \
5056                 { \
5057                     oks[1] = 0; \
5058                     fprintf( stderr, #dqname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
5059@@ -1345,15 +1352,15 @@ static int check_quant( int cpu_ref, int cpu_new )
5060         { \
5061             set_func_name( "%s_%s", #dqname, i_cqm?"cqm":"flat" ); \
5062             used_asms[1] = 1; \
5063-            for( int qp = 51; qp > 0; qp-- ) \
5064+            for( int qp = QP_MAX; qp > 0; qp-- ) \
5065             { \
5066                 for( int i = 0; i < 16; i++ ) \
5067                     dct1[i] = rand(); \
5068                 call_c1( qf_c.qname, dct1, h->quant##w##_mf[block][qp][0]>>1, h->quant##w##_bias[block][qp][0]>>1 ); \
5069-                memcpy( dct2, dct1, w*w*2 ); \
5070+                memcpy( dct2, dct1, w*w*sizeof(dctcoef) ); \
5071                 call_c1( qf_c.dqname, dct1, h->dequant##w##_mf[block], qp ); \
5072                 call_a1( qf_a.dqname, dct2, h->dequant##w##_mf[block], qp ); \
5073-                if( memcmp( dct1, dct2, w*w*2 ) ) \
5074+                if( memcmp( dct1, dct2, w*w*sizeof(dctcoef) ) ) \
5075                 { \
5076                     oks[1] = 0; \
5077                     fprintf( stderr, #dqname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
5078@@ -1381,12 +1388,12 @@ static int check_quant( int cpu_ref, int cpu_new )
5079         for( int size = 16; size <= 64; size += 48 )
5080         {
5081             set_func_name( "denoise_dct" );
5082-            memcpy( dct1, buf1, size*2 );
5083-            memcpy( dct2, buf1, size*2 );
5084+            memcpy( dct1, buf1, size*sizeof(dctcoef) );
5085+            memcpy( dct2, buf1, size*sizeof(dctcoef) );
5086             memcpy( buf3+256, buf3, 256 );
5087             call_c1( qf_c.denoise_dct, dct1, (uint32_t*)buf3, (uint16_t*)buf2, size );
5088             call_a1( qf_a.denoise_dct, dct2, (uint32_t*)(buf3+256), (uint16_t*)buf2, size );
5089-            if( memcmp( dct1, dct2, size*2 ) || memcmp( buf3+4, buf3+256+4, (size-1)*sizeof(uint32_t) ) )
5090+            if( memcmp( dct1, dct2, size*sizeof(dctcoef) ) || memcmp( buf3+4, buf3+256+4, (size-1)*sizeof(uint32_t) ) )
5091                 ok = 0;
5092             call_c2( qf_c.denoise_dct, dct1, (uint32_t*)buf3, (uint16_t*)buf2, size );
5093             call_a2( qf_a.denoise_dct, dct2, (uint32_t*)(buf3+256), (uint16_t*)buf2, size );
5094@@ -1431,7 +1438,7 @@ static int check_quant( int cpu_ref, int cpu_new )
5095         { \
5096             int nnz = 0; \
5097             int max = rand() & (w*w-1); \
5098-            memset( dct1, 0, w*w*2 ); \
5099+            memset( dct1, 0, w*w*sizeof(dctcoef) ); \
5100             for( int idx = ac; idx < max; idx++ ) \
5101                 nnz |= dct1[idx] = !(rand()&3) + (!(rand()&15))*rand(); \
5102             if( !nnz ) \
5103@@ -1464,7 +1471,7 @@ static int check_quant( int cpu_ref, int cpu_new )
5104             x264_run_level_t runlevel_c, runlevel_a; \
5105             int nnz = 0; \
5106             int max = rand() & (w*w-1); \
5107-            memset( dct1, 0, w*w*2 ); \
5108+            memset( dct1, 0, w*w*sizeof(dctcoef) ); \
5109             memcpy( &runlevel_a, buf1+i, sizeof(x264_run_level_t) ); \
5110             memcpy( &runlevel_c, buf1+i, sizeof(x264_run_level_t) ); \
5111             for( int idx = ac; idx < max; idx++ ) \
5112@@ -1474,7 +1481,7 @@ static int check_quant( int cpu_ref, int cpu_new )
5113             int result_c = call_c( qf_c.lastname, dct1+ac, &runlevel_c ); \
5114             int result_a = call_a( qf_a.lastname, dct1+ac, &runlevel_a ); \
5115             if( result_c != result_a || runlevel_c.last != runlevel_a.last || \
5116-                memcmp(runlevel_c.level, runlevel_a.level, sizeof(int16_t)*result_c) || \
5117+                memcmp(runlevel_c.level, runlevel_a.level, sizeof(dctcoef)*result_c) || \
5118                 memcmp(runlevel_c.run, runlevel_a.run, sizeof(uint8_t)*(result_c-1)) ) \
5119             { \
5120                 ok = 0; \
5121@@ -1529,11 +1536,11 @@ static int check_intra( int cpu_ref, int cpu_new )
5122     {\
5123         set_func_name( "intra_%s_%s", #name, intra_##name##_names[dir] );\
5124         used_asm = 1;\
5125-        memcpy( buf3, buf1, 32*20 * sizeof(pixel) );\
5126-        memcpy( buf4, buf1, 32*20 * sizeof(pixel) );\
5127+        memcpy( pbuf3, pbuf1, 32*20 * sizeof(pixel) );\
5128+        memcpy( pbuf4, pbuf1, 32*20 * sizeof(pixel) );\
5129         call_c( ip_c.name[dir], pbuf3+48, ##__VA_ARGS__ );\
5130         call_a( ip_a.name[dir], pbuf4+48, ##__VA_ARGS__ );\
5131-        if( memcmp( buf3, buf4, 32*20 * sizeof(pixel) ) )\
5132+        if( memcmp( pbuf3, pbuf4, 32*20 * sizeof(pixel) ) )\
5133         {\
5134             fprintf( stderr, #name "[%d] :  [FAILED]\n", dir );\
5135             ok = 0;\
5136@@ -1544,7 +1551,7 @@ static int check_intra( int cpu_ref, int cpu_new )
5137             {\
5138                 printf( "%2x ", edge[14-j] );\
5139                 for( int k = 0; k < w; k++ )\
5140-                    printf( "%2x ", buf4[48+k+j*32] );\
5141+                    printf( "%2x ", pbuf4[48+k+j*32] );\
5142                 printf( "\n" );\
5143             }\
5144             printf( "\n" );\
5145@@ -1552,7 +1559,7 @@ static int check_intra( int cpu_ref, int cpu_new )
5146             {\
5147                 printf( "   " );\
5148                 for( int k = 0; k < w; k++ )\
5149-                    printf( "%2x ", buf3[48+k+j*32] );\
5150+                    printf( "%2x ", pbuf3[48+k+j*32] );\
5151                 printf( "\n" );\
5152             }\
5153         }\
5154@@ -1831,8 +1838,9 @@ int main(int argc, char *argv[])
5155     fprintf( stderr, "x264: using random seed %u\n", seed );
5156     srand( seed );
5157 
5158-    buf1 = x264_malloc( 0x3e00 + 16*BENCH_ALIGNS );
5159-    if( !buf1 )
5160+    buf1 = x264_malloc( 0x1e00 + 0x2000*sizeof(pixel) + 16*BENCH_ALIGNS );
5161+    pbuf1 = x264_malloc( 0x1e00*sizeof(pixel) + 16*BENCH_ALIGNS );
5162+    if( !buf1 || !pbuf1 )
5163     {
5164         fprintf( stderr, "malloc failed, unable to initiate tests!\n" );
5165         return -1;
5166@@ -1840,15 +1848,17 @@ int main(int argc, char *argv[])
5167 #define INIT_POINTER_OFFSETS\
5168     buf2 = buf1 + 0xf00;\
5169     buf3 = buf2 + 0xf00;\
5170-    buf4 = buf3 + 0x1000;\
5171-    pbuf1 = (pixel*)buf1;\
5172-    pbuf2 = (pixel*)buf2;\
5173+    buf4 = buf3 + 0x1000*sizeof(pixel);\
5174+    pbuf2 = pbuf1 + 0xf00;\
5175     pbuf3 = (pixel*)buf3;\
5176     pbuf4 = (pixel*)buf4;
5177     INIT_POINTER_OFFSETS;
5178     for( int i = 0; i < 0x1e00; i++ )
5179+    {
5180         buf1[i] = rand() & 0xFF;
5181-    memset( buf1+0x1e00, 0, 0x2000 );
5182+        pbuf1[i] = rand() & PIXEL_MAX;
5183+    }
5184+    memset( buf1+0x1e00, 0, 0x2000*sizeof(pixel) );
5185 
5186     /* 16-byte alignment is guaranteed whenever it's useful, but some functions also vary in speed depending on %64 */
5187     if( do_bench )
5188@@ -1857,6 +1867,7 @@ int main(int argc, char *argv[])
5189             INIT_POINTER_OFFSETS;
5190             ret |= x264_stack_pagealign( check_all_flags, i*16 );
5191             buf1 += 16;
5192+            pbuf1 += 16;
5193             quiet = 1;
5194             fprintf( stderr, "%d/%d\r", i+1, BENCH_ALIGNS );
5195         }
5196diff --git a/x264.c b/x264.c
5197index 0bede93..e6d27d1 100644
5198--- a/x264.c
5199+++ b/x264.c
5200@@ -262,6 +262,7 @@ static void Help( x264_param_t *defaults, int longhelp )
5201         " .mkv -> Matroska\n"
5202         " .flv -> Flash Video\n"
5203         " .mp4 -> MP4 if compiled with GPAC support (%s)\n"
5204+        "Output bit depth: %d (configured at compile time)\n"
5205         "\n"
5206         "Options:\n"
5207         "\n"
5208@@ -286,10 +287,11 @@ static void Help( x264_param_t *defaults, int longhelp )
5209         "no",
5210 #endif
5211 #if HAVE_GPAC
5212-        "yes"
5213+        "yes",
5214 #else
5215-        "no"
5216+        "no",
5217 #endif
5218+        BIT_DEPTH
5219       );
5220     H0( "Example usage:\n" );
5221     H0( "\n" );
5222@@ -311,7 +313,7 @@ static void Help( x264_param_t *defaults, int longhelp )
5223     H0( "\n" );
5224     H0( "Presets:\n" );
5225     H0( "\n" );
5226-    H0( "      --profile               Force the limits of an H.264 profile [high]\n"
5227+    H0( "      --profile               Force the limits of an H.264 profile\n"
5228         "                                  Overrides all settings.\n" );
5229     H2( "                                  - baseline:\n"
5230         "                                    --no-8x8dct --bframes 0 --no-cabac\n"
5231@@ -322,8 +324,11 @@ static void Help( x264_param_t *defaults, int longhelp )
5232         "                                    --no-8x8dct --cqm flat\n"
5233         "                                    No lossless.\n"
5234         "                                  - high:\n"
5235-        "                                    No lossless.\n" );
5236-        else H0( "                                  - baseline,main,high\n" );
5237+        "                                    No lossless.\n"
5238+        "                                  - high10:\n"
5239+        "                                    No lossless.\n"
5240+        "                                    Support for bit depth 8-10.\n" );
5241+        else H0( "                                  - baseline,main,high,high10\n" );
5242     H0( "      --preset                Use a preset to select encoding settings [medium]\n"
5243         "                                  Overridden by user settings.\n" );
5244     H2( "                                  - ultrafast:\n"
5245@@ -453,9 +458,9 @@ static void Help( x264_param_t *defaults, int longhelp )
5246     H0( "\n" );
5247     H0( "Ratecontrol:\n" );
5248     H0( "\n" );
5249-    H1( "  -q, --qp <integer>          Force constant QP (0-51, 0=lossless)\n" );
5250+    H1( "  -q, --qp <integer>          Force constant QP (0-%d, 0=lossless)\n", QP_MAX );
5251     H0( "  -B, --bitrate <integer>     Set bitrate (kbit/s)\n" );
5252-    H0( "      --crf <float>           Quality-based VBR (0-51, 0=lossless) [%.1f]\n", defaults->rc.f_rf_constant );
5253+    H0( "      --crf <float>           Quality-based VBR (0-%d, 0=lossless) [%.1f]\n", QP_MAX, defaults->rc.f_rf_constant );
5254     H1( "      --rc-lookahead <integer> Number of frames for frametype lookahead [%d]\n", defaults->rc.i_lookahead );
5255     H0( "      --vbv-maxrate <integer> Max local bitrate (kbit/s) [%d]\n", defaults->rc.i_vbv_max_bitrate );
5256     H0( "      --vbv-bufsize <integer> Set size of the VBV buffer (kbit) [%d]\n", defaults->rc.i_vbv_buffer_size );
5257@@ -1040,6 +1045,7 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
5258 #else
5259                 printf( "using a non-gcc compiler\n" );
5260 #endif
5261+                printf( "configuration: --bit-depth=%d\n", BIT_DEPTH );
5262                 exit(0);
5263             case OPT_FRAMES:
5264                 param->i_frame_total = X264_MAX( atoi( optarg ), 0 );
5265@@ -1318,7 +1324,7 @@ static void parse_qpfile( cli_opt_t *opt, x264_picture_t *pic, int i_frame )
5266         else if( type == 'B' ) pic->i_type = X264_TYPE_BREF;
5267         else if( type == 'b' ) pic->i_type = X264_TYPE_B;
5268         else ret = 0;
5269-        if( ret != 3 || qp < -1 || qp > 51 )
5270+        if( ret != 3 || qp < -1 || qp > QP_MAX )
5271         {
5272             x264_cli_log( "x264", X264_LOG_ERROR, "can't parse qpfile for frame %d\n", i_frame );
5273             fclose( opt->qpfile );
5274diff --git a/x264.h b/x264.h
5275index 097365a..4d9b9ca 100644
5276--- a/x264.h
5277+++ b/x264.h
5278@@ -344,7 +344,7 @@ typedef struct x264_param_t
5279     {
5280         int         i_rc_method;    /* X264_RC_* */
5281 
5282-        int         i_qp_constant;  /* 0-51 */
5283+        int         i_qp_constant;  /* 0 to (51 + 6*(BIT_DEPTH-8)) */
5284         int         i_qp_min;       /* min allowed QP value */
5285         int         i_qp_max;       /* max allowed QP value */
5286         int         i_qp_step;      /* max QP step between frames */
5287@@ -550,7 +550,7 @@ void    x264_param_apply_fastfirstpass( x264_param_t * );
5288 /* x264_param_apply_profile:
5289  *      Applies the restrictions of the given profile.
5290  *      Currently available profiles are, from most to least restrictive: */
5291-static const char * const x264_profile_names[] = { "baseline", "main", "high", 0 };
5292+static const char * const x264_profile_names[] = { "baseline", "main", "high", "high10", 0 };
5293 
5294 /*      (can be NULL, in which case the function will do nothing)
5295  *
5296-- 
52971.7.1