· 8 years ago · Jun 03, 2017, 11:26 AM
1From aa1a8435000228c4d9e74da0f9fd3d16e85a3e80 Mon Sep 17 00:00:00 2001
2From: Loren Merritt <pengvado@akuvian.org>
3Date: Sat, 26 Jun 2010 20:55:59 -0700
4Subject: [PATCH 1/7] Simplify pixel_ads
5
6---
7 common/macroblock.c | 2 +-
8 common/x86/pixel-a.asm | 175 +++++++++++++++++------------------------------
9 encoder/me.c | 2 +-
10 3 files changed, 65 insertions(+), 114 deletions(-)
11
12diff --git a/common/macroblock.c b/common/macroblock.c
13index 8e9b06d..4561d8a 100644
14--- a/common/macroblock.c
15+++ b/common/macroblock.c
16@@ -341,7 +341,7 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
17 int buf_ssim = h->param.analyse.b_ssim * 8 * (h->param.i_width/4+3) * sizeof(int);
18 int me_range = X264_MIN(h->param.analyse.i_me_range, h->param.analyse.i_mv_range);
19 int buf_tesa = (h->param.analyse.i_me_method >= X264_ME_ESA) *
20- ((me_range*2+18) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
21+ ((me_range*2+24) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
22 scratch_size = X264_MAX3( buf_hpel, buf_ssim, buf_tesa );
23 }
24 int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+3)&~3) * sizeof(int);
25diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm
26index 78ca4c7..1756f86 100644
27--- a/common/x86/pixel-a.asm
28+++ b/common/x86/pixel-a.asm
29@@ -2142,34 +2142,24 @@ cglobal pixel_ssim_end4_sse2, 3,3,7
30 ; Successive Elimination ADS
31 ;=============================================================================
32
33-%macro ADS_START 1 ; unroll_size
34-%ifdef ARCH_X86_64
35- %define t0 r6
36+%macro ADS_START 0
37 %ifdef WIN64
38- mov r4, r4mp
39- movsxd r5, dword r5m
40+ movsxd r5, r5d
41 %endif
42- mov r10, rsp
43-%else
44- %define t0 r4
45- mov rbp, rsp
46-%endif
47- mov r0d, r5m
48- sub rsp, r0
49- sub rsp, %1*4-1
50- and rsp, ~15
51- mov t0, rsp
52+ mov r0d, r5d
53+ lea r6, [r4+r5+15]
54+ and r6, ~15;
55 shl r2d, 1
56 %endmacro
57
58-%macro ADS_END 1
59+%macro ADS_END 1 ; unroll_size
60 add r1, 8*%1
61 add r3, 8*%1
62- add t0, 4*%1
63+ add r6, 4*%1
64 sub r0d, 4*%1
65 jg .loop
66 %ifdef WIN64
67- RESTORE_XMM r10
68+ RESTORE_XMM rsp
69 %endif
70 jmp ads_mvs
71 %endmacro
72@@ -2180,14 +2170,14 @@ cglobal pixel_ssim_end4_sse2, 3,3,7
73 ; int pixel_ads4( int enc_dc[4], uint16_t *sums, int delta,
74 ; uint16_t *cost_mvx, int16_t *mvs, int width, int thresh )
75 ;-----------------------------------------------------------------------------
76-cglobal pixel_ads4_mmxext, 4,7
77+cglobal pixel_ads4_mmxext, 6,7
78 movq mm6, [r0]
79 movq mm4, [r0+8]
80 pshufw mm7, mm6, 0
81 pshufw mm6, mm6, 0xAA
82 pshufw mm5, mm4, 0
83 pshufw mm4, mm4, 0xAA
84- ADS_START 1
85+ ADS_START
86 .loop:
87 movq mm0, [r1]
88 movq mm1, [r1+16]
89@@ -2204,25 +2194,19 @@ cglobal pixel_ads4_mmxext, 4,7
90 ABS1 mm3, mm1
91 paddw mm0, mm2
92 paddw mm0, mm3
93-%ifdef WIN64
94- pshufw mm1, [r10+stack_offset+56], 0
95-%elifdef ARCH_X86_64
96- pshufw mm1, [r10+8], 0
97-%else
98- pshufw mm1, [ebp+stack_offset+28], 0
99-%endif
100+ pshufw mm1, r6m, 0
101 paddusw mm0, [r3]
102 psubusw mm1, mm0
103 packsswb mm1, mm1
104- movd [t0], mm1
105+ movd [r6], mm1
106 ADS_END 1
107
108-cglobal pixel_ads2_mmxext, 4,7
109+cglobal pixel_ads2_mmxext, 6,7
110 movq mm6, [r0]
111 pshufw mm5, r6m, 0
112 pshufw mm7, mm6, 0
113 pshufw mm6, mm6, 0xAA
114- ADS_START 1
115+ ADS_START
116 .loop:
117 movq mm0, [r1]
118 movq mm1, [r1+r2]
119@@ -2235,13 +2219,13 @@ cglobal pixel_ads2_mmxext, 4,7
120 movq mm4, mm5
121 psubusw mm4, mm0
122 packsswb mm4, mm4
123- movd [t0], mm4
124+ movd [r6], mm4
125 ADS_END 1
126
127-cglobal pixel_ads1_mmxext, 4,7
128+cglobal pixel_ads1_mmxext, 6,7
129 pshufw mm7, [r0], 0
130 pshufw mm6, r6m, 0
131- ADS_START 2
132+ ADS_START
133 .loop:
134 movq mm0, [r1]
135 movq mm1, [r1+8]
136@@ -2256,11 +2240,11 @@ cglobal pixel_ads1_mmxext, 4,7
137 psubusw mm4, mm0
138 psubusw mm5, mm1
139 packsswb mm4, mm5
140- movq [t0], mm4
141+ movq [r6], mm4
142 ADS_END 2
143
144 %macro ADS_SSE2 1
145-cglobal pixel_ads4_%1, 4,7,12
146+cglobal pixel_ads4_%1, 6,7,12
147 movdqa xmm4, [r0]
148 pshuflw xmm7, xmm4, 0
149 pshuflw xmm6, xmm4, 0xAA
150@@ -2273,7 +2257,7 @@ cglobal pixel_ads4_%1, 4,7,12
151 %ifdef ARCH_X86_64
152 pshuflw xmm8, r6m, 0
153 punpcklqdq xmm8, xmm8
154- ADS_START 2
155+ ADS_START
156 movdqu xmm10, [r1]
157 movdqu xmm11, [r1+r2]
158 .loop:
159@@ -2299,9 +2283,9 @@ cglobal pixel_ads4_%1, 4,7,12
160 movdqa xmm1, xmm8
161 psubusw xmm1, xmm0
162 packsswb xmm1, xmm1
163- movq [t0], xmm1
164+ movq [r6], xmm1
165 %else
166- ADS_START 2
167+ ADS_START
168 .loop:
169 movdqu xmm0, [r1]
170 movdqu xmm1, [r1+16]
171@@ -2318,18 +2302,18 @@ cglobal pixel_ads4_%1, 4,7,12
172 ABS1 xmm3, xmm1
173 paddw xmm0, xmm2
174 paddw xmm0, xmm3
175- movd xmm1, [ebp+stack_offset+28]
176+ movd xmm1, r6m
177 movdqu xmm2, [r3]
178 pshuflw xmm1, xmm1, 0
179 punpcklqdq xmm1, xmm1
180 paddusw xmm0, xmm2
181 psubusw xmm1, xmm0
182 packsswb xmm1, xmm1
183- movq [t0], xmm1
184+ movq [r6], xmm1
185 %endif ; ARCH
186 ADS_END 2
187
188-cglobal pixel_ads2_%1, 4,7,8
189+cglobal pixel_ads2_%1, 6,7,8
190 movq xmm6, [r0]
191 movd xmm5, r6m
192 pshuflw xmm7, xmm6, 0
193@@ -2338,7 +2322,7 @@ cglobal pixel_ads2_%1, 4,7,8
194 punpcklqdq xmm7, xmm7
195 punpcklqdq xmm6, xmm6
196 punpcklqdq xmm5, xmm5
197- ADS_START 2
198+ ADS_START
199 .loop:
200 movdqu xmm0, [r1]
201 movdqu xmm1, [r1+r2]
202@@ -2352,17 +2336,17 @@ cglobal pixel_ads2_%1, 4,7,8
203 movdqa xmm1, xmm5
204 psubusw xmm1, xmm0
205 packsswb xmm1, xmm1
206- movq [t0], xmm1
207+ movq [r6], xmm1
208 ADS_END 2
209
210-cglobal pixel_ads1_%1, 4,7,8
211+cglobal pixel_ads1_%1, 6,7,8
212 movd xmm7, [r0]
213 movd xmm6, r6m
214 pshuflw xmm7, xmm7, 0
215 pshuflw xmm6, xmm6, 0
216 punpcklqdq xmm7, xmm7
217 punpcklqdq xmm6, xmm6
218- ADS_START 4
219+ ADS_START
220 .loop:
221 movdqu xmm0, [r1]
222 movdqu xmm1, [r1+16]
223@@ -2379,7 +2363,7 @@ cglobal pixel_ads1_%1, 4,7,8
224 psubusw xmm4, xmm0
225 psubusw xmm5, xmm1
226 packsswb xmm4, xmm5
227- movdqa [t0], xmm4
228+ movdqa [r6], xmm4
229 ADS_END 4
230 %endmacro
231
232@@ -2401,90 +2385,57 @@ ADS_SSE2 ssse3
233 ; }
234 ; return nmv;
235 ; }
236+
237+%macro TEST 1
238+ mov [r4+r0*2], r1w
239+ test r2d, 0xff<<(%1*8)
240+ setne r3b
241+ add r0d, r3d
242+ inc r1d
243+%endmacro
244+
245 cglobal pixel_ads_mvs, 0,7,0
246 ads_mvs:
247-%ifdef ARCH_X86_64
248+ lea r6, [r4+r5+15]
249+ and r6, ~15;
250 ; mvs = r4
251- ; masks = rsp
252+ ; masks = r6
253 ; width = r5
254 ; clear last block in case width isn't divisible by 8. (assume divisible by 4, so clearing 4 bytes is enough.)
255-%ifdef WIN64
256- mov r8, r4
257- mov r9, r5
258-%endif
259- xor eax, eax
260- xor esi, esi
261- mov dword [rsp+r9], 0
262+ xor r0d, r0d
263+ xor r1d, r1d
264+ mov [r6+r5], r0d
265 jmp .loopi
266+ALIGN 16
267 .loopi0:
268- add esi, 8
269- cmp esi, r9d
270+ add r1d, 8
271+ cmp r1d, r5d
272 jge .end
273 .loopi:
274- mov rdi, [rsp+rsi]
275- test rdi, rdi
276+ mov r2, [r6+r1]
277+%ifdef ARCH_X86_64
278+ test r2, r2
279+%else
280+ mov r3, r2
281+ or r3d, [r6+r1+4]
282+%endif
283 jz .loopi0
284- xor ecx, ecx
285-%macro TEST 1
286- mov [r8+rax*2], si
287- test edi, 0xff<<(%1*8)
288- setne cl
289- add eax, ecx
290- inc esi
291-%endmacro
292+ xor r3d, r3d
293 TEST 0
294 TEST 1
295 TEST 2
296 TEST 3
297- shr rdi, 32
298+%ifdef ARCH_X86_64
299+ shr r2, 32
300+%else
301+ mov r2d, [r6+r1]
302+%endif
303 TEST 0
304 TEST 1
305 TEST 2
306 TEST 3
307- cmp esi, r9d
308- jl .loopi
309-.end:
310- mov rsp, r10
311- RET
312-
313-%else
314- xor eax, eax
315- xor esi, esi
316- mov ebx, [ebp+stack_offset+20] ; mvs
317- mov edi, [ebp+stack_offset+24] ; width
318- mov dword [esp+edi], 0
319- push ebp
320- jmp .loopi
321-.loopi0:
322- add esi, 8
323- cmp esi, edi
324- jge .end
325-.loopi:
326- mov ebp, [esp+esi+4]
327- mov edx, [esp+esi+8]
328- mov ecx, ebp
329- or ecx, edx
330- jz .loopi0
331- xor ecx, ecx
332-%macro TEST 2
333- mov [ebx+eax*2], si
334- test %2, 0xff<<(%1*8)
335- setne cl
336- add eax, ecx
337- inc esi
338-%endmacro
339- TEST 0, ebp
340- TEST 1, ebp
341- TEST 2, ebp
342- TEST 3, ebp
343- TEST 0, edx
344- TEST 1, edx
345- TEST 2, edx
346- TEST 3, edx
347- cmp esi, edi
348+ cmp r1d, r5d
349 jl .loopi
350 .end:
351- pop esp
352+ movifnidn eax, r0d
353 RET
354-%endif ; ARCH
355-
356diff --git a/encoder/me.c b/encoder/me.c
357index 291104a..19c5b2b 100644
358--- a/encoder/me.c
359+++ b/encoder/me.c
360@@ -609,7 +609,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
361 if( h->mb.i_me_method == X264_ME_TESA )
362 {
363 // ADS threshold, then SAD threshold, then keep the best few SADs, then SATD
364- mvsad_t *mvsads = (mvsad_t *)(xs + ((width+15)&~15));
365+ mvsad_t *mvsads = (mvsad_t *)(xs + ((width+15)&~15) + 4);
366 int nmvsad = 0, limit;
367 int sad_thresh = i_me_range <= 16 ? 10 : i_me_range <= 24 ? 11 : 12;
368 int bsad = h->pixf.sad[i_pixel]( p_fenc, FENC_STRIDE, p_fref_w+bmy*stride+bmx, stride )
369--
3701.7.1
371
372
373From 4f74306c2f266bfc671ad99e9027b816dd423ece Mon Sep 17 00:00:00 2001
374From: Jason Garrett-Glaser <darkshikari@gmail.com>
375Date: Mon, 28 Jun 2010 15:02:33 -0700
376Subject: [PATCH 2/7] Callback feature for low-latency per-slice output
377 Add a callback to allow the calling application to send slices immediately after being encoded.
378 Also add some extra information to the x264_nal_t structure to help inform such a calling application how the NAL units should be ordered.
379
380Full documentation is in x264.h.
381---
382 common/bitstream.c | 7 ++-
383 common/bitstream.h | 1 -
384 encoder/encoder.c | 26 ++++++++---
385 x264.h | 128 +++++++++++++++++++++++++++++++++-------------------
386 4 files changed, 105 insertions(+), 57 deletions(-)
387
388diff --git a/common/bitstream.c b/common/bitstream.c
389index 0aaac21..ad8c16e 100644
390--- a/common/bitstream.c
391+++ b/common/bitstream.c
392@@ -44,7 +44,7 @@ uint8_t *x264_nal_escape_sse2( uint8_t *dst, uint8_t *src, uint8_t *end );
393 /****************************************************************************
394 * x264_nal_encode:
395 ****************************************************************************/
396-int x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal, int b_long_startcode )
397+void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal )
398 {
399 uint8_t *src = nal->p_payload;
400 uint8_t *end = nal->p_payload + nal->i_payload;
401@@ -52,7 +52,7 @@ int x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal, int b_long_startc
402
403 if( h->param.b_annexb )
404 {
405- if( b_long_startcode )
406+ if( nal->b_long_startcode )
407 *dst++ = 0x00;
408 *dst++ = 0x00;
409 *dst++ = 0x00;
410@@ -77,7 +77,8 @@ int x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal, int b_long_startc
411 orig_dst[3] = size>> 0;
412 }
413
414- return size+4;
415+ nal->i_payload = size+4;
416+ nal->p_payload = orig_dst;
417 }
418
419 void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf )
420diff --git a/common/bitstream.h b/common/bitstream.h
421index 9ce5bd7..dd8118d 100644
422--- a/common/bitstream.h
423+++ b/common/bitstream.h
424@@ -68,7 +68,6 @@ typedef struct
425 uint8_t *(*nal_escape) ( uint8_t *dst, uint8_t *src, uint8_t *end );
426 } x264_bitstream_function_t;
427
428-int x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal, int b_long_startcode );
429 void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf );
430
431 /* A larger level table size theoretically could help a bit at extremely
432diff --git a/encoder/encoder.c b/encoder/encoder.c
433index f54fe85..fe97aef 100644
434--- a/encoder/encoder.c
435+++ b/encoder/encoder.c
436@@ -427,6 +427,8 @@ static int x264_validate_parameters( x264_t *h )
437 else
438 h->param.b_sliced_threads = 0;
439 h->i_thread_frames = h->param.b_sliced_threads ? 1 : h->param.i_threads;
440+ if( h->i_thread_frames > 1 )
441+ h->param.nalu_process = NULL;
442
443 if( h->param.b_interlaced )
444 {
445@@ -1253,8 +1255,9 @@ static void x264_nal_start( x264_t *h, int i_type, int i_ref_idc )
446 {
447 x264_nal_t *nal = &h->out.nal[h->out.i_nal];
448
449- nal->i_ref_idc = i_ref_idc;
450- nal->i_type = i_type;
451+ nal->i_ref_idc = i_ref_idc;
452+ nal->i_type = i_type;
453+ nal->b_long_startcode = 1;
454
455 nal->i_payload= 0;
456 nal->p_payload= &h->out.p_bitstream[bs_pos( &h->out.bs ) / 8];
457@@ -1280,6 +1283,8 @@ static int x264_nal_end( x264_t *h )
458 {
459 x264_nal_t *nal = &h->out.nal[h->out.i_nal];
460 nal->i_payload = &h->out.p_bitstream[bs_pos( &h->out.bs ) / 8] - nal->p_payload;
461+ if( h->param.nalu_process )
462+ h->param.nalu_process( h, nal );
463 h->out.i_nal++;
464
465 return x264_nal_check_buffer( h );
466@@ -1289,6 +1294,13 @@ static int x264_encoder_encapsulate_nals( x264_t *h, int start )
467 {
468 int nal_size = 0, previous_nal_size = 0;
469
470+ if( h->param.nalu_process )
471+ {
472+ for( int i = start; i < h->out.i_nal; i++ )
473+ nal_size += h->out.nal[i].i_payload;
474+ return nal_size;
475+ }
476+
477 for( int i = 0; i < start; i++ )
478 previous_nal_size += h->out.nal[i].i_payload;
479
480@@ -1311,11 +1323,9 @@ static int x264_encoder_encapsulate_nals( x264_t *h, int start )
481
482 for( int i = start; i < h->out.i_nal; i++ )
483 {
484- int long_startcode = !i || h->out.nal[i].i_type == NAL_SPS || h->out.nal[i].i_type == NAL_PPS;
485- int size = x264_nal_encode( h, nal_buffer, &h->out.nal[i], long_startcode );
486- h->out.nal[i].i_payload = size;
487- h->out.nal[i].p_payload = nal_buffer;
488- nal_buffer += size;
489+ h->out.nal[i].b_long_startcode = !i || h->out.nal[i].i_type == NAL_SPS || h->out.nal[i].i_type == NAL_PPS;
490+ x264_nal_encode( h, nal_buffer, &h->out.nal[i] );
491+ nal_buffer += h->out.nal[i].i_payload;
492 }
493
494 x264_emms();
495@@ -1805,6 +1815,7 @@ static int x264_slice_write( x264_t *h )
496
497 /* Slice */
498 x264_nal_start( h, h->i_nal_type, h->i_nal_ref_idc );
499+ h->out.nal[h->out.i_nal].i_first_mb = h->sh.i_first_mb;
500
501 /* Slice header */
502 x264_macroblock_thread_init( h );
503@@ -2020,6 +2031,7 @@ static int x264_slice_write( x264_t *h )
504 i_mb_x = 0;
505 }
506 }
507+ h->out.nal[h->out.i_nal].i_last_mb = h->sh.i_last_mb;
508
509 if( h->param.b_cabac )
510 {
511diff --git a/x264.h b/x264.h
512index 1138a8b..e1ae084 100644
513--- a/x264.h
514+++ b/x264.h
515@@ -35,13 +35,61 @@
516
517 #include <stdarg.h>
518
519-#define X264_BUILD 100
520+#define X264_BUILD 101
521
522 /* x264_t:
523 * opaque handler for encoder */
524 typedef struct x264_t x264_t;
525
526 /****************************************************************************
527+ * NAL structure and functions
528+ ****************************************************************************/
529+
530+enum nal_unit_type_e
531+{
532+ NAL_UNKNOWN = 0,
533+ NAL_SLICE = 1,
534+ NAL_SLICE_DPA = 2,
535+ NAL_SLICE_DPB = 3,
536+ NAL_SLICE_DPC = 4,
537+ NAL_SLICE_IDR = 5, /* ref_idc != 0 */
538+ NAL_SEI = 6, /* ref_idc == 0 */
539+ NAL_SPS = 7,
540+ NAL_PPS = 8,
541+ NAL_AUD = 9,
542+ NAL_FILLER = 12,
543+ /* ref_idc == 0 for 6,9,10,11,12 */
544+};
545+enum nal_priority_e
546+{
547+ NAL_PRIORITY_DISPOSABLE = 0,
548+ NAL_PRIORITY_LOW = 1,
549+ NAL_PRIORITY_HIGH = 2,
550+ NAL_PRIORITY_HIGHEST = 3,
551+};
552+
553+/* The data within the payload is already NAL-encapsulated; the ref_idc and type
554+ * are merely in the struct for easy access by the calling application.
555+ * All data returned in an x264_nal_t, including the data in p_payload, is no longer
556+ * valid after the next call to x264_encoder_encode. Thus it must be used or copied
557+ * before calling x264_encoder_encode or x264_encoder_headers again. */
558+typedef struct
559+{
560+ int i_ref_idc; /* nal_priority_e */
561+ int i_type; /* nal_unit_type_e */
562+ int b_long_startcode;
563+ int i_first_mb; /* If this NAL is a slice, the index of the first MB in the slice. */
564+ int i_last_mb; /* If this NAL is a slice, the index of the last MB in the slice. */
565+
566+ /* Size of payload in bytes. */
567+ int i_payload;
568+ /* If param->b_annexb is set, Annex-B bytestream with startcode.
569+ * Otherwise, startcode is replaced with a 4-byte size.
570+ * This size is the size used in mp4/similar muxing; it is equal to i_payload-4 */
571+ uint8_t *p_payload;
572+} x264_nal_t;
573+
574+/****************************************************************************
575 * Encoder parameters
576 ****************************************************************************/
577 /* CPU flags
578@@ -377,8 +425,41 @@ typedef struct x264_param_t
579 * i.e. when an x264_param_t is passed to x264_t in an x264_picture_t or in zones.
580 * Not used when x264_encoder_reconfig is called directly. */
581 void (*param_free)( void* );
582+
583+ /* Optional low-level callback for low-latency encoding. Called for each output NAL unit
584+ * immediately after the NAL unit is finished encoding. This allows the calling application
585+ * to begin processing video data (e.g. by sending packets over a network) before the frame
586+ * is done encoding.
587+ *
588+ * This callback MUST do the following in order to work correctly:
589+ * 1) Have available an output buffer of at least size nal->i_payload*3/2 + 5 + 16.
590+ * 2) Call x264_nal_encode( h, dst, nal ), where dst is the output buffer.
591+ * After these steps, the content of nal is valid and can be used in the same way as if
592+ * the NAL unit were output by x264_encoder_encode.
593+ *
594+ * This does not need to be synchronous with the encoding process: the data pointed to
595+ * by nal (both before and after x264_nal_encode) will remain valid until the next
596+ * x264_encoder_encode call. The callback must be re-entrant.
597+ *
598+ * This callback does not work with frame-based threads; threads must be disabled
599+ * or sliced-threads enabled. This callback also does not work as one would expect
600+ * with HRD -- since the buffering period SEI cannot be calculated until the frame
601+ * is finished encoding, it will not be sent via this callback.
602+ *
603+ * Note also that the NALs are not necessarily returned in order when sliced threads is
604+ * enabled. Accordingly, the variable i_first_mb and i_last_mb are available in
605+ * x264_nal_t to help the calling application reorder the slices if necessary.
606+ *
607+ * When this callback is enabled, x264_encoder_encode does not return valid NALs;
608+ * the calling application is expected to acquire all output NALs through the callback.
609+ *
610+ * It is generally sensible to combine this callback with a use of slice-max-mbs or
611+ * slice-max-size. */
612+ void (*nalu_process) ( x264_t *h, x264_nal_t *nal );
613 } x264_param_t;
614
615+void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal );
616+
617 /****************************************************************************
618 * H.264 level restriction information
619 ****************************************************************************/
620@@ -586,51 +667,6 @@ int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_heigh
621 void x264_picture_clean( x264_picture_t *pic );
622
623 /****************************************************************************
624- * NAL structure and functions
625- ****************************************************************************/
626-
627-enum nal_unit_type_e
628-{
629- NAL_UNKNOWN = 0,
630- NAL_SLICE = 1,
631- NAL_SLICE_DPA = 2,
632- NAL_SLICE_DPB = 3,
633- NAL_SLICE_DPC = 4,
634- NAL_SLICE_IDR = 5, /* ref_idc != 0 */
635- NAL_SEI = 6, /* ref_idc == 0 */
636- NAL_SPS = 7,
637- NAL_PPS = 8,
638- NAL_AUD = 9,
639- NAL_FILLER = 12,
640- /* ref_idc == 0 for 6,9,10,11,12 */
641-};
642-enum nal_priority_e
643-{
644- NAL_PRIORITY_DISPOSABLE = 0,
645- NAL_PRIORITY_LOW = 1,
646- NAL_PRIORITY_HIGH = 2,
647- NAL_PRIORITY_HIGHEST = 3,
648-};
649-
650-/* The data within the payload is already NAL-encapsulated; the ref_idc and type
651- * are merely in the struct for easy access by the calling application.
652- * All data returned in an x264_nal_t, including the data in p_payload, is no longer
653- * valid after the next call to x264_encoder_encode. Thus it must be used or copied
654- * before calling x264_encoder_encode or x264_encoder_headers again. */
655-typedef struct
656-{
657- int i_ref_idc; /* nal_priority_e */
658- int i_type; /* nal_unit_type_e */
659-
660- /* Size of payload in bytes. */
661- int i_payload;
662- /* If param->b_annexb is set, Annex-B bytestream with 4-byte startcode.
663- * Otherwise, startcode is replaced with a 4-byte size.
664- * This size is the size used in mp4/similar muxing; it is equal to i_payload-4 */
665- uint8_t *p_payload;
666-} x264_nal_t;
667-
668-/****************************************************************************
669 * Encoder functions
670 ****************************************************************************/
671
672--
6731.7.1
674
675
676From 22bf1672adafa4e938a13952b8f71cd7548d31f1 Mon Sep 17 00:00:00 2001
677From: Lamont Alston <wewk584@gmail.com>
678Date: Tue, 29 Jun 2010 10:11:42 -0700
679Subject: [PATCH 3/7] Make open-GOP Blu-ray compatible
680 Blu-ray is even more braindamaged than we thought.
681 Accordingly, open-gop options are now "normal" and "bluray", as opposed to display and coded.
682 Normal should be used in all cases besides Blu-ray authoring.
683
684---
685 encoder/encoder.c | 2 +-
686 encoder/slicetype.c | 28 +++++++---------------------
687 x264.c | 8 ++++----
688 x264.h | 8 ++++----
689 4 files changed, 16 insertions(+), 30 deletions(-)
690
691diff --git a/encoder/encoder.c b/encoder/encoder.c
692index fe97aef..5cd3307 100644
693--- a/encoder/encoder.c
694+++ b/encoder/encoder.c
695@@ -577,7 +577,7 @@ static int x264_validate_parameters( x264_t *h )
696 h->param.analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
697 }
698 h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_MIN( X264_BFRAME_MAX, h->param.i_keyint_max-1 ) );
699- h->param.i_open_gop = x264_clip3( h->param.i_open_gop, X264_OPEN_GOP_NONE, X264_OPEN_GOP_CODED_ORDER );
700+ h->param.i_open_gop = x264_clip3( h->param.i_open_gop, X264_OPEN_GOP_NONE, X264_OPEN_GOP_BLURAY );
701 if( h->param.i_keyint_max == 1 )
702 h->param.b_intra_refresh = 0;
703 h->param.i_bframe_bias = x264_clip3( h->param.i_bframe_bias, -90, 100 );
704diff --git a/encoder/slicetype.c b/encoder/slicetype.c
705index 2703f02..4ede8cf 100644
706--- a/encoder/slicetype.c
707+++ b/encoder/slicetype.c
708@@ -1233,17 +1233,11 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
709 if( !h->param.b_intra_refresh )
710 for( int i = keyint_limit+1; i <= num_frames; i += h->param.i_keyint_max )
711 {
712- int j = i;
713- if( h->param.i_open_gop == X264_OPEN_GOP_CODED_ORDER )
714- {
715- while( IS_X264_TYPE_B( frames[i]->i_type ) )
716- i++;
717- while( IS_X264_TYPE_B( frames[j-1]->i_type ) )
718- j--;
719- }
720 frames[i]->i_type = X264_TYPE_I;
721 reset_start = X264_MIN( reset_start, i+1 );
722- i = j;
723+ if( h->param.i_open_gop == X264_OPEN_GOP_BLURAY )
724+ while( IS_X264_TYPE_B( frames[i-1]->i_type ) )
725+ i--;
726 }
727
728 if( vbv_lookahead )
729@@ -1337,16 +1331,8 @@ void x264_slicetype_decide( x264_t *h )
730 if( frm->i_type == X264_TYPE_AUTO || frm->i_type == X264_TYPE_I )
731 frm->i_type = h->param.i_open_gop && h->lookahead->i_last_keyframe >= 0 ? X264_TYPE_I : X264_TYPE_IDR;
732 int warn = frm->i_type != X264_TYPE_IDR;
733- if( warn && h->param.i_open_gop == X264_OPEN_GOP_DISPLAY_ORDER )
734- warn &= frm->i_type != X264_TYPE_I && frm->i_type != X264_TYPE_KEYFRAME;
735- if( warn && h->param.i_open_gop == X264_OPEN_GOP_CODED_ORDER )
736- {
737- /* if this minigop ends with i, it's not a violation */
738- int j = bframes;
739- while( IS_X264_TYPE_B( h->lookahead->next.list[j]->i_type ) )
740- j++;
741- warn = h->lookahead->next.list[j]->i_type != X264_TYPE_I && h->lookahead->next.list[j]->i_type != X264_TYPE_KEYFRAME;
742- }
743+ if( warn && h->param.i_open_gop )
744+ warn &= frm->i_type != X264_TYPE_I;
745 if( warn )
746 x264_log( h, X264_LOG_WARNING, "specified frame type (%d) at %d is not compatible with keyframe interval\n", frm->i_type, frm->i_frame );
747 }
748@@ -1355,8 +1341,8 @@ void x264_slicetype_decide( x264_t *h )
749 if( h->param.i_open_gop )
750 {
751 h->lookahead->i_last_keyframe = frm->i_frame; // Use display order
752- if( h->param.i_open_gop == X264_OPEN_GOP_CODED_ORDER )
753- h->lookahead->i_last_keyframe -= bframes; // Use coded order
754+ if( h->param.i_open_gop == X264_OPEN_GOP_BLURAY )
755+ h->lookahead->i_last_keyframe -= bframes; // Use bluray order
756 frm->b_keyframe = 1;
757 }
758 else
759diff --git a/x264.c b/x264.c
760index df04385..f08ab41 100644
761--- a/x264.c
762+++ b/x264.c
763@@ -382,10 +382,10 @@ static void Help( x264_param_t *defaults, int longhelp )
764 " - normal: Non-strict (not Blu-ray compatible)\n",
765 strtable_lookup( x264_b_pyramid_names, defaults->i_bframe_pyramid ) );
766 H1( " --open-gop <string> Use recovery points to close GOPs [none]\n"
767- " - none: Use standard closed GOPs\n"
768- " - display: Base GOP length on display order\n"
769- " (not Blu-ray compatible)\n"
770- " - coded: Base GOP length on coded order\n"
771+ " - none: closed GOPs only\n"
772+ " - normal: standard open GOPs\n"
773+ " (not Blu-ray compatible)\n"
774+ " - bluray: Blu-ray-compatible open GOPs\n"
775 " Only available with b-frames\n" );
776 H1( " --no-cabac Disable CABAC\n" );
777 H1( " -r, --ref <integer> Number of reference frames [%d]\n", defaults->i_frame_reference );
778diff --git a/x264.h b/x264.h
779index e1ae084..86f7426 100644
780--- a/x264.h
781+++ b/x264.h
782@@ -153,8 +153,8 @@ typedef struct
783 #define X264_B_PYRAMID_NORMAL 2
784 #define X264_KEYINT_MIN_AUTO 0
785 #define X264_OPEN_GOP_NONE 0
786-#define X264_OPEN_GOP_DISPLAY_ORDER 1
787-#define X264_OPEN_GOP_CODED_ORDER 2
788+#define X264_OPEN_GOP_NORMAL 1
789+#define X264_OPEN_GOP_BLURAY 2
790
791 static const char * const x264_direct_pred_names[] = { "none", "spatial", "temporal", "auto", 0 };
792 static const char * const x264_motion_est_names[] = { "dia", "hex", "umh", "esa", "tesa", 0 };
793@@ -166,7 +166,7 @@ static const char * const x264_colorprim_names[] = { "", "bt709", "undef", "", "
794 static const char * const x264_transfer_names[] = { "", "bt709", "undef", "", "bt470m", "bt470bg", "smpte170m", "smpte240m", "linear", "log100", "log316", 0 };
795 static const char * const x264_colmatrix_names[] = { "GBR", "bt709", "undef", "", "fcc", "bt470bg", "smpte170m", "smpte240m", "YCgCo", 0 };
796 static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 };
797-static const char * const x264_open_gop_names[] = { "none", "display", "coded", 0 };
798+static const char * const x264_open_gop_names[] = { "none", "normal", "bluray", 0 };
799
800 /* Colorspace type
801 * legacy only; nothing other than I420 is really supported. */
802@@ -276,7 +276,7 @@ typedef struct x264_param_t
803 int i_bframe_adaptive;
804 int i_bframe_bias;
805 int i_bframe_pyramid; /* Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal */
806- int i_open_gop; /* Open gop: 1=display order, 2=coded order to determine gop size */
807+ int i_open_gop; /* Open gop: 1=display order, 2=bluray compatibility braindamage mode */
808
809 int b_deblocking_filter;
810 int i_deblocking_filter_alphac0; /* [-6, 6] -6 light filter, 6 strong */
811--
8121.7.1
813
814
815From ce052b8bae0bed131716d3ed655b974a1a2ebcb5 Mon Sep 17 00:00:00 2001
816From: Steven Walters <kemuri9@gmail.com>
817Date: Sat, 26 Jun 2010 16:28:49 -0400
818Subject: [PATCH 4/7] Centralize logging within x264cli
819 x264cli messages will now respect the log level they pertain to.
820 Slightly reduces binary size.
821
822---
823 input/avs.c | 88 +++++-------------
824 input/ffms.c | 58 +++--------
825 input/input.h | 2 +
826 input/lavf.c | 55 +++--------
827 input/thread.c | 9 +-
828 input/timecode.c | 111 ++++++----------------
829 input/y4m.c | 23 +----
830 input/yuv.c | 8 +-
831 muxers.h | 61 ------------
832 output/flv.c | 10 +-
833 output/flv_bytestream.c | 2 +-
834 output/matroska.c | 2 +-
835 output/matroska_ebml.c | 2 +-
836 output/mp4.c | 12 +--
837 output/output.h | 2 +
838 output/raw.c | 2 +-
839 x264.c | 246 +++++++++++++++++++++--------------------------
840 x264cli.h | 67 +++++++++++++
841 18 files changed, 289 insertions(+), 471 deletions(-)
842 delete mode 100644 muxers.h
843 create mode 100644 x264cli.h
844
845diff --git a/input/avs.c b/input/avs.c
846index 07add40..b83f715 100644
847--- a/input/avs.c
848+++ b/input/avs.c
849@@ -20,8 +20,9 @@
850 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
851 *****************************************************************************/
852
853-#include "muxers.h"
854+#include "input.h"
855 #include <windows.h>
856+#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "avs", __VA_ARGS__ )
857
858 /* the AVS interface currently uses __declspec to link function declarations to their definitions in the dll.
859 this has a side effect of preventing program execution if the avisynth dll is not found,
860@@ -131,27 +132,15 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
861 FILE *fh = fopen( psz_filename, "r" );
862 if( !fh )
863 return -1;
864- else if( !x264_is_regular_file( fh ) )
865- {
866- fprintf( stderr, "avs [error]: AVS input is incompatible with non-regular file `%s'\n", psz_filename );
867- return -1;
868- }
869+ FAIL_IF_ERROR( !x264_is_regular_file( fh ), "AVS input is incompatible with non-regular file `%s'\n", psz_filename );
870 fclose( fh );
871
872 avs_hnd_t *h = malloc( sizeof(avs_hnd_t) );
873 if( !h )
874 return -1;
875- if( avs_load_library( h ) )
876- {
877- fprintf( stderr, "avs [error]: failed to load avisynth\n" );
878- return -1;
879- }
880+ FAIL_IF_ERROR( avs_load_library( h ), "failed to load avisynth\n" )
881 h->env = h->func.avs_create_script_environment( AVS_INTERFACE_YV12 );
882- if( !h->env )
883- {
884- fprintf( stderr, "avs [error]: failed to initiate avisynth\n" );
885- return -1;
886- }
887+ FAIL_IF_ERROR( !h->env, "failed to initiate avisynth\n" )
888 AVS_Value arg = avs_new_value_string( psz_filename );
889 AVS_Value res;
890 char *filename_ext = get_filename_extension( psz_filename );
891@@ -159,11 +148,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
892 if( !strcasecmp( filename_ext, "avs" ) )
893 {
894 res = h->func.avs_invoke( h->env, "Import", arg, NULL );
895- if( avs_is_error( res ) )
896- {
897- fprintf( stderr, "avs [error]: %s\n", avs_as_string( res ) );
898- return -1;
899- }
900+ FAIL_IF_ERROR( avs_is_error( res ), "%s\n", avs_as_string( res ) )
901 /* check if the user is using a multi-threaded script and apply distributor if necessary.
902 adapted from avisynth's vfw interface */
903 AVS_Value mt_test = h->func.avs_invoke( h->env, "GetMTMode", avs_new_value_bool( 0 ), NULL );
904@@ -184,78 +169,55 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
905 int i;
906 for( i = 0; filter[i]; i++ )
907 {
908- fprintf( stderr, "avs [info]: trying %s... ", filter[i] );
909+ x264_cli_log( "avs", X264_LOG_INFO, "trying %s... ", filter[i] );
910 if( !h->func.avs_function_exists( h->env, filter[i] ) )
911 {
912- fprintf( stderr, "not found\n" );
913+ x264_cli_printf( X264_LOG_INFO, "not found\n" );
914 continue;
915 }
916 if( !strncasecmp( filter[i], "FFmpegSource", 12 ) )
917 {
918- fprintf( stderr, "indexing... " );
919+ x264_cli_printf( X264_LOG_INFO, "indexing... " );
920 fflush( stderr );
921 }
922 res = h->func.avs_invoke( h->env, filter[i], arg, NULL );
923 if( !avs_is_error( res ) )
924 {
925- fprintf( stderr, "succeeded\n" );
926+ x264_cli_printf( X264_LOG_INFO, "succeeded\n" );
927 break;
928 }
929- fprintf( stderr, "failed\n" );
930- }
931- if( !filter[i] )
932- {
933- fprintf( stderr, "avs [error]: unable to find source filter to open `%s'\n", psz_filename );
934- return -1;
935+ x264_cli_printf( X264_LOG_INFO, "failed\n" );
936 }
937+ FAIL_IF_ERROR( !filter[i], "unable to find source filter to open `%s'\n", psz_filename )
938 }
939- if( !avs_is_clip( res ) )
940- {
941- fprintf( stderr, "avs [error]: `%s' didn't return a video clip\n", psz_filename );
942- return -1;
943- }
944+ FAIL_IF_ERROR( !avs_is_clip( res ), "`%s' didn't return a video clip\n", psz_filename )
945 h->clip = h->func.avs_take_clip( res, h->env );
946 int avs_version = h->func.avs_get_version( h->clip );
947 const AVS_VideoInfo *vi = h->func.avs_get_video_info( h->clip );
948- if( !avs_has_video( vi ) )
949- {
950- fprintf( stderr, "avs [error]: `%s' has no video data\n", psz_filename );
951- return -1;
952- }
953+ FAIL_IF_ERROR( !avs_has_video( vi ), "`%s' has no video data\n", psz_filename )
954 /* if the clip is made of fields instead of frames, call weave to make them frames */
955 if( avs_is_field_based( vi ) )
956 {
957- fprintf( stderr, "avs [warning]: detected fieldbased (separated) input, weaving to frames\n" );
958+ x264_cli_log( "avs", X264_LOG_WARNING, "detected fieldbased (separated) input, weaving to frames\n" );
959 AVS_Value tmp = h->func.avs_invoke( h->env, "Weave", res, NULL );
960- if( avs_is_error( tmp ) )
961- {
962- fprintf( stderr, "avs [error]: couldn't weave fields into frames\n" );
963- return -1;
964- }
965+ FAIL_IF_ERROR( avs_is_error( tmp ), "couldn't weave fields into frames\n" )
966 res = update_clip( h, &vi, tmp, res );
967 info->interlaced = 1;
968 info->tff = avs_is_tff( vi );
969 }
970- if( vi->width&1 || vi->height&1 )
971- {
972- fprintf( stderr, "avs [error]: input clip width or height not divisible by 2 (%dx%d)\n",
973- vi->width, vi->height );
974- return -1;
975- }
976+ FAIL_IF_ERROR( vi->width&1 || vi->height&1, "input clip width or height not divisible by 2 (%dx%d)\n", vi->width, vi->height )
977 /* always call ConvertToYV12 to convert non YV12 planar colorspaces to YV12 when user's AVS supports them,
978 as all planar colorspaces are flagged as YV12. If it is already YV12 in this case, the call does nothing */
979 if( !avs_is_yv12( vi ) || avs_version >= AVS_INTERFACE_OTHER_PLANAR )
980 {
981- fprintf( stderr, "avs %s\n", !avs_is_yv12( vi ) ? "[warning]: converting input clip to YV12"
982- : "[info]: avisynth 2.6+ detected, forcing conversion to YV12" );
983+ if( !avs_is_yv12( vi ) )
984+ x264_cli_log( "avs", X264_LOG_WARNING, "converting input clip to YV12" );
985+ else
986+ x264_cli_log( "avs", X264_LOG_INFO, "avisynth 2.6+ detected, forcing conversion to YV12" );
987 const char *arg_name[2] = { NULL, "interlaced" };
988 AVS_Value arg_arr[2] = { res, avs_new_value_bool( info->interlaced ) };
989 AVS_Value res2 = h->func.avs_invoke( h->env, "ConvertToYV12", avs_new_value_array( arg_arr, 2 ), arg_name );
990- if( avs_is_error( res2 ) )
991- {
992- fprintf( stderr, "avs [error]: couldn't convert input clip to YV12\n" );
993- return -1;
994- }
995+ FAIL_IF_ERROR( avs_is_error( res2 ), "couldn't convert input clip to YV12\n" )
996 res = update_clip( h, &vi, res2, res );
997 }
998 h->func.avs_release_value( res );
999@@ -294,11 +256,7 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
1000 return -1;
1001 AVS_VideoFrame *frm = p_pic->opaque = h->func.avs_get_frame( h->clip, i_frame );
1002 const char *err = h->func.avs_clip_get_error( h->clip );
1003- if( err )
1004- {
1005- fprintf( stderr, "avs [error]: %s occurred while reading frame %d\n", err, i_frame );
1006- return -1;
1007- }
1008+ FAIL_IF_ERROR( err, "%s occurred while reading frame %d\n", err, i_frame )
1009 for( int i = 0; i < 3; i++ )
1010 {
1011 /* explicitly cast away the const attribute to avoid a warning */
1012diff --git a/input/ffms.c b/input/ffms.c
1013index b2a253e..fe8bf7e 100644
1014--- a/input/ffms.c
1015+++ b/input/ffms.c
1016@@ -21,8 +21,10 @@
1017 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
1018 *****************************************************************************/
1019
1020-#include "muxers.h"
1021+#include "input.h"
1022 #include <ffms.h>
1023+#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "ffms", __VA_ARGS__ )
1024+
1025 #undef DECLARE_ALIGNED
1026 #include <libavcodec/avcodec.h>
1027 #include <libswscale/swscale.h>
1028@@ -86,28 +88,16 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1029 {
1030 idx = FFMS_MakeIndex( psz_filename, 0, 0, NULL, NULL, 0, update_progress, NULL, &e );
1031 fprintf( stderr, " \r" );
1032- if( !idx )
1033- {
1034- fprintf( stderr, "ffms [error]: could not create index\n" );
1035- return -1;
1036- }
1037+ FAIL_IF_ERROR( !idx, "could not create index\n" )
1038 if( opt->index_file && FFMS_WriteIndex( opt->index_file, idx, &e ) )
1039- fprintf( stderr, "ffms [warning]: could not write index file\n" );
1040+ x264_cli_log( "ffms", X264_LOG_WARNING, "could not write index file\n" );
1041 }
1042
1043 int trackno = FFMS_GetFirstTrackOfType( idx, FFMS_TYPE_VIDEO, &e );
1044- if( trackno < 0 )
1045- {
1046- fprintf( stderr, "ffms [error]: could not find video track\n" );
1047- return -1;
1048- }
1049+ FAIL_IF_ERROR( trackno < 0, "could not find video track\n" )
1050
1051 h->video_source = FFMS_CreateVideoSource( psz_filename, trackno, idx, 1, seekmode, &e );
1052- if( !h->video_source )
1053- {
1054- fprintf( stderr, "ffms [error]: could not create video source\n" );
1055- return -1;
1056- }
1057+ FAIL_IF_ERROR( !h->video_source, "could not create video source\n" )
1058
1059 h->track = FFMS_GetTrackFromVideo( h->video_source );
1060
1061@@ -121,11 +111,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1062 h->vfr_input = info->vfr;
1063
1064 const FFMS_Frame *frame = FFMS_GetFrame( h->video_source, 0, &e );
1065- if( !frame )
1066- {
1067- fprintf( stderr, "ffms [error]: could not read frame 0\n" );
1068- return -1;
1069- }
1070+ FAIL_IF_ERROR( !frame, "could not read frame 0\n" )
1071
1072 h->init_width = h->cur_width = info->width = frame->EncodedWidth;
1073 h->init_height = h->cur_height = info->height = frame->EncodedHeight;
1074@@ -134,8 +120,8 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1075 info->tff = frame->TopFieldFirst;
1076
1077 if( h->cur_pix_fmt != PIX_FMT_YUV420P )
1078- fprintf( stderr, "ffms [warning]: converting from %s to YV12\n",
1079- avcodec_get_pix_fmt_name( h->cur_pix_fmt ) );
1080+ x264_cli_log( "ffms", X264_LOG_WARNING, "converting from %s to YV12\n",
1081+ avcodec_get_pix_fmt_name( h->cur_pix_fmt ) );
1082
1083 /* ffms timestamps are in milliseconds. ffms also uses int64_ts for timebase,
1084 * so we need to reduce large timebases to prevent overflow */
1085@@ -173,19 +159,15 @@ static int check_swscale( ffms_hnd_t *h, const FFMS_Frame *frame, int i_frame )
1086 if( h->scaler )
1087 {
1088 sws_freeContext( h->scaler );
1089- fprintf( stderr, "ffms [warning]: stream properties changed to %dx%d, %s at frame %d \n", frame->EncodedWidth,
1090- frame->EncodedHeight, avcodec_get_pix_fmt_name( frame->EncodedPixelFormat ), i_frame );
1091+ x264_cli_log( "ffms", X264_LOG_WARNING, "stream properties changed to %dx%d, %s at frame %d \n", frame->EncodedWidth,
1092+ frame->EncodedHeight, avcodec_get_pix_fmt_name( frame->EncodedPixelFormat ), i_frame );
1093 h->cur_width = frame->EncodedWidth;
1094 h->cur_height = frame->EncodedHeight;
1095 h->cur_pix_fmt = frame->EncodedPixelFormat;
1096 }
1097 h->scaler = sws_getContext( h->cur_width, h->cur_height, h->cur_pix_fmt, h->init_width, h->init_height,
1098 PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL );
1099- if( !h->scaler )
1100- {
1101- fprintf( stderr, "ffms [error]: could not open swscale context\n" );
1102- return -1;
1103- }
1104+ FAIL_IF_ERROR( !h->scaler, "could not open swscale context\n" )
1105 return 0;
1106 }
1107
1108@@ -195,11 +177,7 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
1109 FFMS_ErrorInfo e;
1110 e.BufferSize = 0;
1111 const FFMS_Frame *frame = FFMS_GetFrame( h->video_source, i_frame, &e );
1112- if( !frame )
1113- {
1114- fprintf( stderr, "ffms [error]: could not read frame %d\n", i_frame );
1115- return -1;
1116- }
1117+ FAIL_IF_ERROR( !frame, "could not read frame %d\n", i_frame )
1118
1119 if( check_swscale( h, frame, i_frame ) )
1120 return -1;
1121@@ -214,12 +192,8 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
1122
1123 if( h->vfr_input )
1124 {
1125- if( info->PTS == AV_NOPTS_VALUE )
1126- {
1127- fprintf( stderr, "ffms [error]: invalid timestamp. "
1128- "Use --force-cfr and specify a framerate with --fps\n" );
1129- return -1;
1130- }
1131+ FAIL_IF_ERROR( info->PTS == AV_NOPTS_VALUE, "invalid timestamp. "
1132+ "Use --force-cfr and specify a framerate with --fps\n" )
1133
1134 if( !h->pts_offset_flag )
1135 {
1136diff --git a/input/input.h b/input/input.h
1137index f89b13b..f588f3c 100644
1138--- a/input/input.h
1139+++ b/input/input.h
1140@@ -25,6 +25,8 @@
1141 #ifndef X264_INPUT_H
1142 #define X264_INPUT_H
1143
1144+#include "x264cli.h"
1145+
1146 /* options that are used by only some demuxers */
1147 typedef struct
1148 {
1149diff --git a/input/lavf.c b/input/lavf.c
1150index 4b0375f..54a275f 100644
1151--- a/input/lavf.c
1152+++ b/input/lavf.c
1153@@ -21,7 +21,8 @@
1154 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
1155 *****************************************************************************/
1156
1157-#include "muxers.h"
1158+#include "input.h"
1159+#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "lavf", __VA_ARGS__ )
1160 #undef DECLARE_ALIGNED
1161 #include <libavformat/avformat.h>
1162 #include <libswscale/swscale.h>
1163@@ -59,19 +60,15 @@ static int check_swscale( lavf_hnd_t *h, AVCodecContext *c, int i_frame )
1164 if( h->scaler )
1165 {
1166 sws_freeContext( h->scaler );
1167- fprintf( stderr, "lavf [warning]: stream properties changed to %dx%d, %s at frame %d \n",
1168- c->width, c->height, avcodec_get_pix_fmt_name( c->pix_fmt ), i_frame );
1169+ x264_cli_log( "lavf", X264_LOG_WARNING, "stream properties changed to %dx%d, %s at frame %d \n",
1170+ c->width, c->height, avcodec_get_pix_fmt_name( c->pix_fmt ), i_frame );
1171 h->cur_width = c->width;
1172 h->cur_height = c->height;
1173 h->cur_pix_fmt = c->pix_fmt;
1174 }
1175 h->scaler = sws_getContext( h->cur_width, h->cur_height, h->cur_pix_fmt, h->init_width, h->init_height,
1176 PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL );
1177- if( !h->scaler )
1178- {
1179- fprintf( stderr, "lavf [error]: could not open swscale context\n" );
1180- return -1;
1181- }
1182+ FAIL_IF_ERROR( !h->scaler, "could not open swscale context\n" )
1183 return 0;
1184 }
1185
1186@@ -106,12 +103,12 @@ static int read_frame_internal( x264_picture_t *p_pic, lavf_hnd_t *h, int i_fram
1187 {
1188 c->reordered_opaque = pkt->pts;
1189 if( avcodec_decode_video2( c, frame, &finished, pkt ) < 0 )
1190- fprintf( stderr, "lavf [warning]: video decoding failed on frame %d\n", h->next_frame );
1191+ x264_cli_log( "lavf", X264_LOG_WARNING, "video decoding failed on frame %d\n", h->next_frame );
1192 }
1193 if( !finished )
1194 {
1195 if( avcodec_decode_video2( c, frame, &finished, pkt ) < 0 )
1196- fprintf( stderr, "lavf [warning]: video decoding failed on frame %d\n", h->next_frame );
1197+ x264_cli_log( "lavf", X264_LOG_WARNING, "video decoding failed on frame %d\n", h->next_frame );
1198 if( !finished )
1199 return -1;
1200 }
1201@@ -166,26 +163,13 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1202 if( !strcmp( psz_filename, "-" ) )
1203 psz_filename = "pipe:";
1204
1205- if( av_open_input_file( &h->lavf, psz_filename, NULL, 0, NULL ) )
1206- {
1207- fprintf( stderr, "lavf [error]: could not open input file\n" );
1208- return -1;
1209- }
1210-
1211- if( av_find_stream_info( h->lavf ) < 0 )
1212- {
1213- fprintf( stderr, "lavf [error]: could not find input stream info\n" );
1214- return -1;
1215- }
1216+ FAIL_IF_ERROR( av_open_input_file( &h->lavf, psz_filename, NULL, 0, NULL ), "could not open input file\n" )
1217+ FAIL_IF_ERROR( av_find_stream_info( h->lavf ) < 0, "could not find input stream info\n" )
1218
1219 int i = 0;
1220 while( i < h->lavf->nb_streams && h->lavf->streams[i]->codec->codec_type != CODEC_TYPE_VIDEO )
1221 i++;
1222- if( i == h->lavf->nb_streams )
1223- {
1224- fprintf( stderr, "lavf [error]: could not find video stream\n" );
1225- return -1;
1226- }
1227+ FAIL_IF_ERROR( i == h->lavf->nb_streams, "could not find video stream\n" )
1228 h->stream_id = i;
1229 h->next_frame = 0;
1230 h->pts_offset_flag = 0;
1231@@ -207,22 +191,15 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1232 info->csp |= X264_CSP_VFLIP;
1233
1234 if( h->cur_pix_fmt != PIX_FMT_YUV420P )
1235- fprintf( stderr, "lavf [warning]: converting from %s to YV12\n",
1236- avcodec_get_pix_fmt_name( h->cur_pix_fmt ) );
1237-
1238- if( avcodec_open( c, avcodec_find_decoder( c->codec_id ) ) )
1239- {
1240- fprintf( stderr, "lavf [error]: could not find decoder for video stream\n" );
1241- return -1;
1242- }
1243+ x264_cli_log( "lavf", X264_LOG_WARNING, "converting from %s to YV12\n",
1244+ avcodec_get_pix_fmt_name( h->cur_pix_fmt ) );
1245+ FAIL_IF_ERROR( avcodec_open( c, avcodec_find_decoder( c->codec_id ) ),
1246+ "could not find decoder for video stream\n" )
1247
1248 /* prefetch the first frame and set/confirm flags */
1249 h->first_pic = malloc( sizeof(x264_picture_t) );
1250- if( !h->first_pic || lavf_input.picture_alloc( h->first_pic, info->csp, info->width, info->height ) )
1251- {
1252- fprintf( stderr, "lavf [error]: malloc failed\n" );
1253- return -1;
1254- }
1255+ FAIL_IF_ERROR( !h->first_pic || lavf_input.picture_alloc( h->first_pic, info->csp, info->width, info->height ),
1256+ "malloc failed\n" )
1257 else if( read_frame_internal( h->first_pic, h, 0, info ) )
1258 return -1;
1259
1260diff --git a/input/thread.c b/input/thread.c
1261index c4b07fa..98af22b 100644
1262--- a/input/thread.c
1263+++ b/input/thread.c
1264@@ -21,7 +21,7 @@
1265 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
1266 *****************************************************************************/
1267
1268-#include "muxers.h"
1269+#include "input.h"
1270
1271 extern cli_input_t input;
1272
1273@@ -47,11 +47,8 @@ typedef struct thread_input_arg_t
1274 static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, cli_input_opt_t *opt )
1275 {
1276 thread_hnd_t *h = malloc( sizeof(thread_hnd_t) );
1277- if( !h || input.picture_alloc( &h->pic, info->csp, info->width, info->height ) )
1278- {
1279- fprintf( stderr, "x264 [error]: malloc failed\n" );
1280- return -1;
1281- }
1282+ FAIL_IF_ERR( !h || input.picture_alloc( &h->pic, info->csp, info->width, info->height ),
1283+ "x264", "malloc failed\n" )
1284 h->input = input;
1285 h->p_handle = *p_handle;
1286 h->next_frame = -1;
1287diff --git a/input/timecode.c b/input/timecode.c
1288index a307327..7821e76 100644
1289--- a/input/timecode.c
1290+++ b/input/timecode.c
1291@@ -20,7 +20,8 @@
1292 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
1293 *****************************************************************************/
1294
1295-#include "muxers.h"
1296+#include "input.h"
1297+#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "timecode", __VA_ARGS__ )
1298 #include <math.h>
1299
1300 extern cli_input_t input;
1301@@ -61,12 +62,8 @@ static double correct_fps( double fps, timecode_hnd_t *h )
1302 {
1303 fps_den = i * h->timebase_num;
1304 fps_num = round( fps_den * fps_sig ) * exponent;
1305- if( fps_num > UINT32_MAX )
1306- {
1307- fprintf( stderr, "timecode [error]: tcfile fps correction failed.\n"
1308- " Specify an appropriate timebase manually or remake tcfile.\n" );
1309- return -1;
1310- }
1311+ FAIL_IF_ERROR( fps_num > UINT32_MAX, "tcfile fps correction failed.\n"
1312+ " Specify an appropriate timebase manually or remake tcfile.\n" )
1313 if( fabs( ((double)fps_num / fps_den) / exponent - fps_sig ) < DOUBLE_EPSILON )
1314 break;
1315 ++i;
1316@@ -91,12 +88,8 @@ static int try_mkv_timebase_den( double *fpss, timecode_hnd_t *h, int loop_num )
1317 double fps_sig = sigexp10( fpss[num], &exponent );
1318 fps_den = round( MKV_TIMEBASE_DEN / fps_sig ) / exponent;
1319 h->timebase_num = fps_den && h->timebase_num ? gcd( h->timebase_num, fps_den ) : fps_den;
1320- if( h->timebase_num > UINT32_MAX || !h->timebase_num )
1321- {
1322- fprintf( stderr, "timecode [error]: automatic timebase generation failed.\n"
1323- " Specify timebase manually.\n" );
1324- return -1;
1325- }
1326+ FAIL_IF_ERROR( h->timebase_num > UINT32_MAX || !h->timebase_num, "automatic timebase generation failed.\n"
1327+ " Specify timebase manually.\n" )
1328 }
1329 return 0;
1330 }
1331@@ -110,11 +103,7 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
1332 double *fpss = NULL;
1333
1334 ret = fscanf( tcfile_in, "# timecode format v%d", &tcfv );
1335- if( ret != 1 || (tcfv != 1 && tcfv != 2) )
1336- {
1337- fprintf( stderr, "timecode [error]: unsupported timecode format\n" );
1338- return -1;
1339- }
1340+ FAIL_IF_ERROR( ret != 1 || (tcfv != 1 && tcfv != 2), "unsupported timecode format\n" )
1341
1342 if( tcfv == 1 )
1343 {
1344@@ -128,18 +117,11 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
1345 {
1346 if( buff[0] == '#' || buff[0] == '\n' || buff[0] == '\r' )
1347 continue;
1348- if( sscanf( buff, "assume %lf", &h->assume_fps ) != 1 && sscanf( buff, "Assume %lf", &h->assume_fps ) != 1 )
1349- {
1350- fprintf( stderr, "timecode [error]: tcfile parsing error: assumed fps not found\n" );
1351- return -1;
1352- }
1353+ FAIL_IF_ERROR( sscanf( buff, "assume %lf", &h->assume_fps ) != 1 && sscanf( buff, "Assume %lf", &h->assume_fps ) != 1,
1354+ "tcfile parsing error: assumed fps not found\n" )
1355 break;
1356 }
1357- if( h->assume_fps <= 0 )
1358- {
1359- fprintf( stderr, "timecode [error]: invalid assumed fps %.6f\n", h->assume_fps );
1360- return -1;
1361- }
1362+ FAIL_IF_ERROR( h->assume_fps <= 0, "invalid assumed fps %.6f\n", h->assume_fps )
1363
1364 file_pos = ftell( tcfile_in );
1365 h->stored_pts_num = 0;
1366@@ -152,16 +134,9 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
1367 continue;
1368 }
1369 ret = sscanf( buff, "%d,%d,%lf", &start, &end, &seq_fps );
1370- if( ret != 3 && ret != EOF )
1371- {
1372- fprintf( stderr, "timecode [error]: invalid input tcfile\n" );
1373- return -1;
1374- }
1375- if( start > end || start <= prev_start || end <= prev_end || seq_fps <= 0 )
1376- {
1377- fprintf( stderr, "timecode [error]: invalid input tcfile at line %d: %s\n", num, buff );
1378- return -1;
1379- }
1380+ FAIL_IF_ERROR( ret != 3 && ret != EOF, "invalid input tcfile\n" )
1381+ FAIL_IF_ERROR( start > end || start <= prev_start || end <= prev_end || seq_fps <= 0,
1382+ "invalid input tcfile at line %d: %s\n", num, buff )
1383 prev_start = start;
1384 prev_end = end;
1385 if( h->auto_timebase_den || h->auto_timebase_num )
1386@@ -259,11 +234,7 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
1387 ++num;
1388 }
1389 timecodes_num = h->stored_pts_num + h->seek;
1390- if( !timecodes_num )
1391- {
1392- fprintf( stderr, "timecode [error]: input tcfile doesn't have any timecodes!\n" );
1393- return -1;
1394- }
1395+ FAIL_IF_ERROR( !timecodes_num, "input tcfile doesn't have any timecodes!\n" )
1396 fseek( tcfile_in, file_pos, SEEK_SET );
1397
1398 timecodes = malloc( timecodes_num * sizeof(double) );
1399@@ -272,11 +243,7 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
1400
1401 fgets( buff, sizeof(buff), tcfile_in );
1402 ret = sscanf( buff, "%lf", &timecodes[0] );
1403- if( ret != 1 )
1404- {
1405- fprintf( stderr, "timecode [error]: invalid input tcfile for frame 0\n" );
1406- goto fail;
1407- }
1408+ FAIL_IF_ERROR( ret != 1, "invalid input tcfile for frame 0\n" )
1409 for( num = 1; num < timecodes_num; )
1410 {
1411 fgets( buff, sizeof(buff), tcfile_in );
1412@@ -284,11 +251,8 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
1413 continue;
1414 ret = sscanf( buff, "%lf", &timecodes[num] );
1415 timecodes[num] *= 1e-3; /* Timecode format v2 is expressed in milliseconds. */
1416- if( ret != 1 || timecodes[num] <= timecodes[num - 1] )
1417- {
1418- fprintf( stderr, "timecode [error]: invalid input tcfile for frame %d\n", num );
1419- goto fail;
1420- }
1421+ FAIL_IF_ERROR( ret != 1 || timecodes[num] <= timecodes[num - 1],
1422+ "invalid input tcfile for frame %d\n", num )
1423 ++num;
1424 }
1425
1426@@ -342,14 +306,10 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
1427 uint64_t i = gcd( h->timebase_num, h->timebase_den );
1428 h->timebase_num /= i;
1429 h->timebase_den /= i;
1430- fprintf( stderr, "timecode [info]: automatic timebase generation %"PRIu64"/%"PRIu64"\n", h->timebase_num, h->timebase_den );
1431- }
1432- else if( h->timebase_den > UINT32_MAX || !h->timebase_den )
1433- {
1434- fprintf( stderr, "timecode [error]: automatic timebase generation failed.\n"
1435- " Specify an appropriate timebase manually.\n" );
1436- goto fail;
1437+ x264_cli_log( "timecode", X264_LOG_INFO, "automatic timebase generation %"PRIu64"/%"PRIu64"\n", h->timebase_num, h->timebase_den );
1438 }
1439+ else FAIL_IF_ERROR( h->timebase_den > UINT32_MAX || !h->timebase_den, "automatic timebase generation failed.\n"
1440+ " Specify an appropriate timebase manually.\n" )
1441
1442 h->pts = malloc( h->stored_pts_num * sizeof(int64_t) );
1443 if( !h->pts )
1444@@ -360,11 +320,7 @@ static int parse_tcfile( FILE *tcfile_in, timecode_hnd_t *h, video_info_t *info
1445 {
1446 h->pts[num] = (int64_t)( timecodes[h->seek + num] * ((double)h->timebase_den / h->timebase_num) + 0.5 );
1447 h->pts[num] -= pts_seek_offset;
1448- if( h->pts[num] <= h->pts[num - 1] )
1449- {
1450- fprintf( stderr, "timecode [error]: invalid timebase or timecode for frame %d\n", num );
1451- goto fail;
1452- }
1453+ FAIL_IF_ERROR( h->pts[num] <= h->pts[num - 1], "invalid timebase or timecode for frame %d\n", num )
1454 }
1455
1456 free( timecodes );
1457@@ -386,11 +342,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1458 int ret = 0;
1459 FILE *tcfile_in;
1460 timecode_hnd_t *h = malloc( sizeof(timecode_hnd_t) );
1461- if( !h )
1462- {
1463- fprintf( stderr, "timecode [error]: malloc failed\n" );
1464- return -1;
1465- }
1466+ FAIL_IF_ERROR( !h, "malloc failed\n" )
1467 h->input = input;
1468 h->p_handle = *p_handle;
1469 h->frame_total = input.get_frame_total( h->p_handle );
1470@@ -400,11 +352,8 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1471 ret = sscanf( opt->timebase, "%"SCNu64"/%"SCNu64, &h->timebase_num, &h->timebase_den );
1472 if( ret == 1 )
1473 h->timebase_num = strtoul( opt->timebase, NULL, 10 );
1474- if( h->timebase_num > UINT32_MAX || h->timebase_den > UINT32_MAX )
1475- {
1476- fprintf( stderr, "timecode [error]: timebase you specified exceeds H.264 maximum\n" );
1477- return -1;
1478- }
1479+ FAIL_IF_ERROR( h->timebase_num > UINT32_MAX || h->timebase_den > UINT32_MAX,
1480+ "timebase you specified exceeds H.264 maximum\n" )
1481 }
1482 h->auto_timebase_num = !ret;
1483 h->auto_timebase_den = ret < 2;
1484@@ -418,14 +367,10 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1485 *p_handle = h;
1486
1487 tcfile_in = fopen( psz_filename, "rb" );
1488- if( !tcfile_in )
1489- {
1490- fprintf( stderr, "timecode [error]: can't open `%s'\n", psz_filename );
1491- return -1;
1492- }
1493+ FAIL_IF_ERROR( !tcfile_in, "can't open `%s'\n", psz_filename )
1494 else if( !x264_is_regular_file( tcfile_in ) )
1495 {
1496- fprintf( stderr, "timecode [error]: tcfile input incompatible with non-regular file `%s'\n", psz_filename );
1497+ x264_cli_log( "timecode", X264_LOG_ERROR, "tcfile input incompatible with non-regular file `%s'\n", psz_filename );
1498 fclose( tcfile_in );
1499 return -1;
1500 }
1501@@ -466,8 +411,8 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
1502 {
1503 if( h->pts )
1504 {
1505- fprintf( stderr, "timecode [info]: input timecode file missing data for frame %d and later\n"
1506- " assuming constant fps %.6f\n", i_frame, h->assume_fps );
1507+ x264_cli_log( "timecode", X264_LOG_INFO, "input timecode file missing data for frame %d and later\n"
1508+ " assuming constant fps %.6f\n", i_frame, h->assume_fps );
1509 free( h->pts );
1510 h->pts = NULL;
1511 }
1512diff --git a/input/y4m.c b/input/y4m.c
1513index fd42140..9b39d2f 100644
1514--- a/input/y4m.c
1515+++ b/input/y4m.c
1516@@ -21,7 +21,8 @@
1517 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
1518 *****************************************************************************/
1519
1520-#include "muxers.h"
1521+#include "input.h"
1522+#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "y4m", __VA_ARGS__ )
1523
1524 typedef struct
1525 {
1526@@ -162,11 +163,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1527 if( colorspace == X264_CSP_NONE )
1528 colorspace = X264_CSP_I420;
1529
1530- if( colorspace != X264_CSP_I420 )
1531- {
1532- fprintf( stderr, "y4m [error]: colorspace unhandled\n" );
1533- return -1;
1534- }
1535+ FAIL_IF_ERROR( colorspace != X264_CSP_I420, "colorspace unhandled\n" )
1536
1537 *p_handle = h;
1538 return 0;
1539@@ -202,21 +199,13 @@ static int read_frame_internal( x264_picture_t *p_pic, y4m_hnd_t *h )
1540 return -1;
1541
1542 header[slen] = 0;
1543- if( strncmp( header, Y4M_FRAME_MAGIC, slen ) )
1544- {
1545- fprintf( stderr, "y4m [error]: bad header magic (%"PRIx32" <=> %s)\n",
1546- M32(header), header );
1547- return -1;
1548- }
1549+ FAIL_IF_ERROR( strncmp( header, Y4M_FRAME_MAGIC, slen ), "bad header magic (%"PRIx32" <=> %s)\n",
1550+ M32(header), header )
1551
1552 /* Skip most of it */
1553 while( i < MAX_FRAME_HEADER && fgetc( h->fh ) != '\n' )
1554 i++;
1555- if( i == MAX_FRAME_HEADER )
1556- {
1557- fprintf( stderr, "y4m [error]: bad frame header!\n" );
1558- return -1;
1559- }
1560+ FAIL_IF_ERROR( i == MAX_FRAME_HEADER, "bad frame header!\n" )
1561 h->frame_header_len = i+slen+1;
1562
1563 if( fread( p_pic->img.plane[0], h->width * h->height, 1, h->fh ) <= 0
1564diff --git a/input/yuv.c b/input/yuv.c
1565index cbed7fc..613662c 100644
1566--- a/input/yuv.c
1567+++ b/input/yuv.c
1568@@ -21,7 +21,7 @@
1569 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
1570 *****************************************************************************/
1571
1572-#include "muxers.h"
1573+#include "input.h"
1574
1575 typedef struct
1576 {
1577@@ -45,11 +45,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1578 }
1579 else
1580 sscanf( opt->resolution, "%ux%u", &info->width, &info->height );
1581- if( !info->width || !info->height )
1582- {
1583- fprintf( stderr, "yuv [error]: rawyuv input requires a resolution.\n" );
1584- return -1;
1585- }
1586+ FAIL_IF_ERR( !info->width || !info->height, "yuv", "rawyuv input requires a resolution.\n" )
1587
1588 h->next_frame = 0;
1589 info->vfr = 0;
1590diff --git a/muxers.h b/muxers.h
1591deleted file mode 100644
1592index b309320..0000000
1593--- a/muxers.h
1594+++ /dev/null
1595@@ -1,61 +0,0 @@
1596-/*****************************************************************************
1597- * muxers.h: h264 file i/o modules
1598- *****************************************************************************
1599- * Copyright (C) 2003-2009 x264 project
1600- *
1601- * Authors: Laurent Aimar <fenrir@via.ecp.fr>
1602- * Loren Merritt <lorenm@u.washington.edu>
1603- *
1604- * This program is free software; you can redistribute it and/or modify
1605- * it under the terms of the GNU General Public License as published by
1606- * the Free Software Foundation; either version 2 of the License, or
1607- * (at your option) any later version.
1608- *
1609- * This program is distributed in the hope that it will be useful,
1610- * but WITHOUT ANY WARRANTY; without even the implied warranty of
1611- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1612- * GNU General Public License for more details.
1613- *
1614- * You should have received a copy of the GNU General Public License
1615- * along with this program; if not, write to the Free Software
1616- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
1617- *****************************************************************************/
1618-
1619-#ifndef X264_MUXERS_H
1620-#define X264_MUXERS_H
1621-
1622-#include "common/common.h"
1623-#include "x264.h"
1624-
1625-typedef void *hnd_t;
1626-
1627-static inline int64_t gcd( int64_t a, int64_t b )
1628-{
1629- while( 1 )
1630- {
1631- int64_t c = a % b;
1632- if( !c )
1633- return b;
1634- a = b;
1635- b = c;
1636- }
1637-}
1638-
1639-static inline int64_t lcm( int64_t a, int64_t b )
1640-{
1641- return ( a / gcd( a, b ) ) * b;
1642-}
1643-
1644-static inline char *get_filename_extension( char *filename )
1645-{
1646- char *ext = filename + strlen( filename );
1647- while( *ext != '.' && ext > filename )
1648- ext--;
1649- ext += *ext == '.';
1650- return ext;
1651-}
1652-
1653-#include "input/input.h"
1654-#include "output/output.h"
1655-
1656-#endif
1657diff --git a/output/flv.c b/output/flv.c
1658index e441b6d..9831a5b 100644
1659--- a/output/flv.c
1660+++ b/output/flv.c
1661@@ -18,7 +18,7 @@
1662 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
1663 *****************************************************************************/
1664
1665-#include "muxers.h"
1666+#include "output.h"
1667 #include "flv_bytestream.h"
1668
1669 #define CHECK(x)\
1670@@ -223,14 +223,14 @@ static int write_frame( hnd_t handle, uint8_t *p_nalu, int i_size, x264_picture_
1671 if( prev_dts == dts )
1672 {
1673 double fps = ((double)p_flv->i_timebase_den / p_flv->i_timebase_num) / (p_picture->i_dts - p_flv->i_prev_dts);
1674- fprintf( stderr, "flv [warning]: duplicate DTS %"PRId64" generated by rounding\n"
1675- " current internal decoding framerate: %.6f fps\n", dts, fps );
1676+ x264_cli_log( "flv", X264_LOG_WARNING, "duplicate DTS %"PRId64" generated by rounding\n"
1677+ " current internal decoding framerate: %.6f fps\n", dts, fps );
1678 }
1679 if( prev_cts == cts )
1680 {
1681 double fps = ((double)p_flv->i_timebase_den / p_flv->i_timebase_num) / (p_picture->i_pts - p_flv->i_prev_pts);
1682- fprintf( stderr, "flv [warning]: duplicate CTS %"PRId64" generated by rounding\n"
1683- " current internal composition framerate: %.6f fps\n", cts, fps );
1684+ x264_cli_log( "flv", X264_LOG_WARNING, "duplicate CTS %"PRId64" generated by rounding\n"
1685+ " current internal composition framerate: %.6f fps\n", cts, fps );
1686 }
1687 }
1688 p_flv->i_prev_dts = p_picture->i_dts;
1689diff --git a/output/flv_bytestream.c b/output/flv_bytestream.c
1690index 316114c..e02476c 100644
1691--- a/output/flv_bytestream.c
1692+++ b/output/flv_bytestream.c
1693@@ -18,7 +18,7 @@
1694 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
1695 *****************************************************************************/
1696
1697-#include "muxers.h"
1698+#include "output.h"
1699 #include "flv_bytestream.h"
1700
1701 uint64_t dbl2int( double value )
1702diff --git a/output/matroska.c b/output/matroska.c
1703index 0304c84..a1219d0 100644
1704--- a/output/matroska.c
1705+++ b/output/matroska.c
1706@@ -18,7 +18,7 @@
1707 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
1708 *****************************************************************************/
1709
1710-#include "muxers.h"
1711+#include "output.h"
1712 #include "matroska_ebml.h"
1713
1714 typedef struct
1715diff --git a/output/matroska_ebml.c b/output/matroska_ebml.c
1716index 31b62f8..adfcaa8 100644
1717--- a/output/matroska_ebml.c
1718+++ b/output/matroska_ebml.c
1719@@ -18,7 +18,7 @@
1720 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
1721 *****************************************************************************/
1722
1723-#include "muxers.h"
1724+#include "output.h"
1725 #include "matroska_ebml.h"
1726
1727 #define CLSIZE 1048576
1728diff --git a/output/mp4.c b/output/mp4.c
1729index 0e3c2fc..f2ff5be 100644
1730--- a/output/mp4.c
1731+++ b/output/mp4.c
1732@@ -21,7 +21,7 @@
1733 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
1734 *****************************************************************************/
1735
1736-#include "muxers.h"
1737+#include "output.h"
1738 #include <gpac/isomedia.h>
1739
1740 #if HAVE_GF_MALLOC
1741@@ -61,12 +61,12 @@ static void recompute_bitrate_mp4( GF_ISOFile *p_file, int i_track )
1742
1743 timescale = gf_isom_get_media_timescale( p_file, i_track );
1744 count = gf_isom_get_sample_count( p_file, i_track );
1745- for( int i = 0; i < count; i++ )
1746+ for( u32 i = 0; i < count; i++ )
1747 {
1748 GF_ISOSample *samp = gf_isom_get_sample_info( p_file, i_track, i+1, &di, &offset );
1749 if( !samp )
1750 {
1751- fprintf( stderr, "mp4 [error]: failure reading back frame %u\n", i );
1752+ x264_cli_log( "mp4", X264_LOG_ERROR, "failure reading back frame %u\n", i );
1753 break;
1754 }
1755
1756@@ -163,11 +163,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle )
1757 FILE *fh = fopen( psz_filename, "w" );
1758 if( !fh )
1759 return -1;
1760- else if( !x264_is_regular_file( fh ) )
1761- {
1762- fprintf( stderr, "mp4 [error]: MP4 output is incompatible with non-regular file `%s'\n", psz_filename );
1763- return -1;
1764- }
1765+ FAIL_IF_ERR( !x264_is_regular_file( fh ), "mp4", "MP4 output is incompatible with non-regular file `%s'\n", psz_filename )
1766 fclose( fh );
1767
1768 if( !(p_mp4 = malloc( sizeof(mp4_hnd_t) )) )
1769diff --git a/output/output.h b/output/output.h
1770index c79b48e..094fefc 100644
1771--- a/output/output.h
1772+++ b/output/output.h
1773@@ -24,6 +24,8 @@
1774 #ifndef X264_OUTPUT_H
1775 #define X264_OUTPUT_H
1776
1777+#include "x264cli.h"
1778+
1779 typedef struct
1780 {
1781 int (*open_file)( char *psz_filename, hnd_t *p_handle );
1782diff --git a/output/raw.c b/output/raw.c
1783index 02e4c56..fc418fb 100644
1784--- a/output/raw.c
1785+++ b/output/raw.c
1786@@ -21,7 +21,7 @@
1787 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
1788 *****************************************************************************/
1789
1790-#include "muxers.h"
1791+#include "output.h"
1792
1793 static int open_file( char *psz_filename, hnd_t *p_handle )
1794 {
1795diff --git a/x264.c b/x264.c
1796index f08ab41..741570c 100644
1797--- a/x264.c
1798+++ b/x264.c
1799@@ -31,9 +31,11 @@
1800 #include <getopt.h>
1801
1802 #include "common/common.h"
1803-#include "common/cpu.h"
1804-#include "x264.h"
1805-#include "muxers.h"
1806+#include "x264cli.h"
1807+#include "input/input.h"
1808+#include "output/output.h"
1809+
1810+#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, "x264", __VA_ARGS__ )
1811
1812 #ifdef _WIN32
1813 #include <windows.h>
1814@@ -96,6 +98,7 @@ static const char * const muxer_names[] =
1815 };
1816
1817 static const char * const pulldown_names[] = { "none", "22", "32", "64", "double", "triple", "euro", 0 };
1818+static const char * const log_level_names[] = { "none", "error", "warning", "info", "debug", 0 };
1819
1820 typedef struct{
1821 int mod;
1822@@ -141,6 +144,48 @@ static void Help( x264_param_t *defaults, int longhelp );
1823 static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt );
1824 static int Encode( x264_param_t *param, cli_opt_t *opt );
1825
1826+/* logging and printing for within the cli system */
1827+static int cli_log_level;
1828+void x264_cli_log( const char *name, int i_level, const char *fmt, ... )
1829+{
1830+ if( i_level > cli_log_level )
1831+ return;
1832+ char *s_level;
1833+ switch( i_level )
1834+ {
1835+ case X264_LOG_ERROR:
1836+ s_level = "error";
1837+ break;
1838+ case X264_LOG_WARNING:
1839+ s_level = "warning";
1840+ break;
1841+ case X264_LOG_INFO:
1842+ s_level = "info";
1843+ break;
1844+ case X264_LOG_DEBUG:
1845+ s_level = "debug";
1846+ break;
1847+ default:
1848+ s_level = "unknown";
1849+ break;
1850+ }
1851+ fprintf( stderr, "%s [%s]: ", name, s_level );
1852+ va_list arg;
1853+ va_start( arg, fmt );
1854+ vfprintf( stderr, fmt, arg );
1855+ va_end( arg );
1856+}
1857+
1858+void x264_cli_printf( int i_level, const char *fmt, ... )
1859+{
1860+ if( i_level > cli_log_level )
1861+ return;
1862+ va_list arg;
1863+ va_start( arg, fmt );
1864+ vfprintf( stderr, fmt, arg );
1865+ va_end( arg );
1866+}
1867+
1868 /****************************************************************************
1869 * main:
1870 ****************************************************************************/
1871@@ -571,6 +616,9 @@ static void Help( x264_param_t *defaults, int longhelp )
1872 H1( " -v, --verbose Print stats for each frame\n" );
1873 H1( " --no-progress Don't show the progress indicator while encoding\n" );
1874 H0( " --quiet Quiet Mode\n" );
1875+ H1( " --log-level <string> Specify the maximum level of logging [\"%s\"]\n"
1876+ " - %s\n", strtable_lookup( log_level_names, cli_log_level - X264_LOG_NONE ),
1877+ stringify_names( buf, log_level_names ) );
1878 H1( " --psnr Enable PSNR computation\n" );
1879 H1( " --ssim Enable SSIM computation\n" );
1880 H1( " --threads <integer> Force a specific number of threads\n" );
1881@@ -616,6 +664,7 @@ enum {
1882 OPT_TCFILE_OUT,
1883 OPT_TIMEBASE,
1884 OPT_PULLDOWN,
1885+ OPT_LOG_LEVEL
1886 } OptionsOPT;
1887
1888 static char short_options[] = "8A:B:b:f:hI:i:m:o:p:q:r:t:Vvw";
1889@@ -729,6 +778,7 @@ static struct option long_options[] =
1890 { "ssim", no_argument, NULL, 0 },
1891 { "quiet", no_argument, NULL, OPT_QUIET },
1892 { "verbose", no_argument, NULL, 'v' },
1893+ { "log-level", required_argument, NULL, OPT_LOG_LEVEL },
1894 { "no-progress", no_argument, NULL, OPT_NOPROGRESS },
1895 { "visualize", no_argument, NULL, OPT_VISUALIZE },
1896 { "dump-yuv", required_argument, NULL, 0 },
1897@@ -780,11 +830,11 @@ static int select_output( const char *muxer, char *filename, x264_param_t *param
1898 param->b_repeat_headers = 0;
1899 if( param->i_nal_hrd == X264_NAL_HRD_CBR )
1900 {
1901- fprintf( stderr, "x264 [warning]: cbr nal-hrd is not compatible with mp4\n" );
1902+ x264_cli_log( "x264", X264_LOG_WARNING, "cbr nal-hrd is not compatible with mp4\n" );
1903 param->i_nal_hrd = X264_NAL_HRD_VBR;
1904 }
1905 #else
1906- fprintf( stderr, "x264 [error]: not compiled with MP4 output support\n" );
1907+ x264_cli_log( "x264", X264_LOG_ERROR, "not compiled with MP4 output support\n" );
1908 return -1;
1909 #endif
1910 }
1911@@ -833,7 +883,7 @@ static int select_input( const char *demuxer, char *used_demuxer, char *filename
1912 input = avs_input;
1913 module = "avs";
1914 #else
1915- fprintf( stderr, "x264 [error]: not compiled with AVS input support\n" );
1916+ x264_cli_log( "x264", X264_LOG_ERROR, "not compiled with AVS input support\n" );
1917 return -1;
1918 #endif
1919 }
1920@@ -877,11 +927,7 @@ static int select_input( const char *demuxer, char *used_demuxer, char *filename
1921 input = yuv_input;
1922 }
1923
1924- if( !(*p_handle) )
1925- {
1926- fprintf( stderr, "x264 [error]: could not open input file `%s' via any method!\n", filename );
1927- return -1;
1928- }
1929+ FAIL_IF_ERROR( !(*p_handle), "could not open input file `%s' via any method!\n", filename )
1930 }
1931 strcpy( used_demuxer, module );
1932
1933@@ -932,6 +978,7 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
1934 char *tune = NULL;
1935
1936 x264_param_default( &defaults );
1937+ cli_log_level = defaults.i_log_level;
1938
1939 memset( opt, 0, sizeof(cli_opt_t) );
1940 memset( &input_opt, 0, sizeof(cli_input_opt_t) );
1941@@ -1004,32 +1051,20 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
1942 output_filename = optarg;
1943 break;
1944 case OPT_MUXER:
1945- if( parse_enum_name( optarg, muxer_names, &muxer ) < 0 )
1946- {
1947- fprintf( stderr, "x264 [error]: Unknown muxer `%s'\n", optarg );
1948- return -1;
1949- }
1950+ FAIL_IF_ERROR( parse_enum_name( optarg, muxer_names, &muxer ), "Unknown muxer `%s'\n", optarg )
1951 break;
1952 case OPT_DEMUXER:
1953- if( parse_enum_name( optarg, demuxer_names, &demuxer ) < 0 )
1954- {
1955- fprintf( stderr, "x264 [error]: Unknown demuxer `%s'\n", optarg );
1956- return -1;
1957- }
1958+ FAIL_IF_ERROR( parse_enum_name( optarg, demuxer_names, &demuxer ), "Unknown demuxer `%s'\n", optarg )
1959 break;
1960 case OPT_INDEX:
1961 input_opt.index_file = optarg;
1962 break;
1963 case OPT_QPFILE:
1964 opt->qpfile = fopen( optarg, "rb" );
1965- if( !opt->qpfile )
1966- {
1967- fprintf( stderr, "x264 [error]: can't open qpfile `%s'\n", optarg );
1968- return -1;
1969- }
1970- else if( !x264_is_regular_file( opt->qpfile ) )
1971+ FAIL_IF_ERROR( !opt->qpfile, "can't open qpfile `%s'\n", optarg )
1972+ if( !x264_is_regular_file( opt->qpfile ) )
1973 {
1974- fprintf( stderr, "x264 [error]: qpfile incompatible with non-regular file `%s'\n", optarg );
1975+ x264_cli_log( "x264", X264_LOG_ERROR, "qpfile incompatible with non-regular file `%s'\n", optarg );
1976 fclose( opt->qpfile );
1977 return -1;
1978 }
1979@@ -1038,11 +1073,17 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
1980 b_thread_input = 1;
1981 break;
1982 case OPT_QUIET:
1983- param->i_log_level = X264_LOG_NONE;
1984+ cli_log_level = param->i_log_level = X264_LOG_NONE;
1985 break;
1986 case 'v':
1987- param->i_log_level = X264_LOG_DEBUG;
1988+ cli_log_level = param->i_log_level = X264_LOG_DEBUG;
1989 break;
1990+ case OPT_LOG_LEVEL:
1991+ if( !parse_enum_value( optarg, log_level_names, &cli_log_level ) )
1992+ cli_log_level += X264_LOG_NONE;
1993+ else
1994+ cli_log_level = atoi( optarg );
1995+ param->i_log_level = cli_log_level;
1996 case OPT_NOPROGRESS:
1997 opt->b_progress = 0;
1998 break;
1999@@ -1051,7 +1092,7 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
2000 param->b_visualize = 1;
2001 b_exit_on_ctrl_c = 1;
2002 #else
2003- fprintf( stderr, "x264 [warning]: not compiled with visualization support\n" );
2004+ x264_cli_log( "x264", X264_LOG_WARNING, "not compiled with visualization support\n" );
2005 #endif
2006 break;
2007 case OPT_TUNE:
2008@@ -1078,18 +1119,13 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
2009 break;
2010 case OPT_TCFILE_OUT:
2011 opt->tcfile_out = fopen( optarg, "wb" );
2012- if( !opt->tcfile_out )
2013- {
2014- fprintf( stderr, "x264 [error]: can't open `%s'\n", optarg );
2015- return -1;
2016- }
2017+ FAIL_IF_ERROR( !opt->tcfile_out, "can't open `%s'\n", optarg )
2018 break;
2019 case OPT_TIMEBASE:
2020 input_opt.timebase = optarg;
2021 break;
2022 case OPT_PULLDOWN:
2023- if( parse_enum_value( optarg, pulldown_names, &opt->i_pulldown ) < 0 )
2024- return -1;
2025+ FAIL_IF_ERROR( parse_enum_value( optarg, pulldown_names, &opt->i_pulldown ), "Unknown pulldown `%s'\n", optarg )
2026 break;
2027 default:
2028 generic_option:
2029@@ -1116,7 +1152,7 @@ generic_option:
2030 if( b_error )
2031 {
2032 const char *name = long_options_index > 0 ? long_options[long_options_index].name : argv[optind-2];
2033- fprintf( stderr, "x264 [error]: invalid argument: %s = %s\n", name, optarg );
2034+ x264_cli_log( "x264", X264_LOG_ERROR, "invalid argument: %s = %s\n", name, optarg );
2035 return -1;
2036 }
2037 }
2038@@ -1130,20 +1166,12 @@ generic_option:
2039 return -1;
2040
2041 /* Get the file name */
2042- if( optind > argc - 1 || !output_filename )
2043- {
2044- fprintf( stderr, "x264 [error]: No %s file. Run x264 --help for a list of options.\n",
2045- optind > argc - 1 ? "input" : "output" );
2046- return -1;
2047- }
2048+ FAIL_IF_ERROR( optind > argc - 1 || !output_filename, "No %s file. Run x264 --help for a list of options.\n",
2049+ optind > argc - 1 ? "input" : "output" )
2050
2051 if( select_output( muxer, output_filename, param ) )
2052 return -1;
2053- if( output.open_file( output_filename, &opt->hout ) )
2054- {
2055- fprintf( stderr, "x264 [error]: could not open output file `%s'\n", output_filename );
2056- return -1;
2057- }
2058+ FAIL_IF_ERROR( output.open_file( output_filename, &opt->hout ), "could not open output file `%s'\n", output_filename )
2059
2060 input_filename = argv[optind++];
2061 input_opt.resolution = optind < argc ? argv[optind++] : NULL;
2062@@ -1163,39 +1191,22 @@ generic_option:
2063 if( select_input( demuxer, demuxername, input_filename, &opt->hin, &info, &input_opt ) )
2064 return -1;
2065
2066- if( !opt->hin && input.open_file( input_filename, &opt->hin, &info, &input_opt ) )
2067- {
2068- fprintf( stderr, "x264 [error]: could not open input file `%s'\n", input_filename );
2069- return -1;
2070- }
2071+ FAIL_IF_ERROR( !opt->hin && input.open_file( input_filename, &opt->hin, &info, &input_opt ),
2072+ "could not open input file `%s'\n", input_filename )
2073
2074 x264_reduce_fraction( &info.sar_width, &info.sar_height );
2075 x264_reduce_fraction( &info.fps_num, &info.fps_den );
2076- if( param->i_log_level >= X264_LOG_INFO )
2077- fprintf( stderr, "%s [info]: %dx%d%c %d:%d @ %d/%d fps (%cfr)\n", demuxername, info.width,
2078- info.height, info.interlaced ? 'i' : 'p', info.sar_width, info.sar_height,
2079- info.fps_num, info.fps_den, info.vfr ? 'v' : 'c' );
2080+ x264_cli_log( demuxername, X264_LOG_INFO, "%dx%d%c %d:%d @ %d/%d fps (%cfr)\n", info.width,
2081+ info.height, info.interlaced ? 'i' : 'p', info.sar_width, info.sar_height,
2082+ info.fps_num, info.fps_den, info.vfr ? 'v' : 'c' );
2083
2084 if( tcfile_name )
2085 {
2086- if( b_user_fps )
2087- {
2088- fprintf( stderr, "x264 [error]: --fps + --tcfile-in is incompatible.\n" );
2089- return -1;
2090- }
2091- if( timecode_input.open_file( tcfile_name, &opt->hin, &info, &input_opt ) )
2092- {
2093- fprintf( stderr, "x264 [error]: timecode input failed\n" );
2094- return -1;
2095- }
2096- else
2097- input = timecode_input;
2098- }
2099- else if( !info.vfr && input_opt.timebase )
2100- {
2101- fprintf( stderr, "x264 [error]: --timebase is incompatible with cfr input\n" );
2102- return -1;
2103+ FAIL_IF_ERROR( b_user_fps, "--fps + --tcfile-in is incompatible.\n" )
2104+ FAIL_IF_ERROR( timecode_input.open_file( tcfile_name, &opt->hin, &info, &input_opt ), "timecode input failed\n" )
2105+ input = timecode_input;
2106 }
2107+ else FAIL_IF_ERROR( !info.vfr && input_opt.timebase, "--timebase is incompatible with cfr input\n" )
2108
2109 /* set param flags from the info flags as necessary */
2110 param->i_csp = info.csp;
2111@@ -1204,9 +1215,9 @@ generic_option:
2112 param->i_width = info.width;
2113 if( !b_user_interlaced && info.interlaced )
2114 {
2115- fprintf( stderr, "x264 [warning]: input appears to be interlaced, enabling %cff interlaced mode.\n"
2116- " If you want otherwise, use --no-interlaced or --%cff\n",
2117- info.tff ? 't' : 'b', info.tff ? 'b' : 't' );
2118+ x264_cli_log( "x264", X264_LOG_WARNING, "input appears to be interlaced, enabling %cff interlaced mode.\n"
2119+ " If you want otherwise, use --no-interlaced or --%cff\n",
2120+ info.tff ? 't' : 'b', info.tff ? 'b' : 't' );
2121 param->b_interlaced = 1;
2122 param->b_tff = !!info.tff;
2123 }
2124@@ -1230,21 +1241,14 @@ generic_option:
2125 uint64_t i_user_timebase_num;
2126 uint64_t i_user_timebase_den;
2127 int ret = sscanf( input_opt.timebase, "%"SCNu64"/%"SCNu64, &i_user_timebase_num, &i_user_timebase_den );
2128- if( !ret )
2129- {
2130- fprintf( stderr, "x264 [error]: invalid argument: timebase = %s\n", input_opt.timebase );
2131- return -1;
2132- }
2133+ FAIL_IF_ERROR( !ret, "invalid argument: timebase = %s\n", input_opt.timebase )
2134 else if( ret == 1 )
2135 {
2136 i_user_timebase_num = param->i_timebase_num;
2137 i_user_timebase_den = strtoul( input_opt.timebase, NULL, 10 );
2138 }
2139- if( i_user_timebase_num > UINT32_MAX || i_user_timebase_den > UINT32_MAX )
2140- {
2141- fprintf( stderr, "x264 [error]: timebase you specified exceeds H.264 maximum\n" );
2142- return -1;
2143- }
2144+ FAIL_IF_ERROR( i_user_timebase_num > UINT32_MAX || i_user_timebase_den > UINT32_MAX,
2145+ "timebase you specified exceeds H.264 maximum\n" )
2146 opt->timebase_convert_multiplier = ((double)i_user_timebase_den / param->i_timebase_den)
2147 * ((double)param->i_timebase_num / i_user_timebase_num);
2148 param->i_timebase_num = i_user_timebase_num;
2149@@ -1261,13 +1265,8 @@ generic_option:
2150 if( b_thread_input || param->i_threads > 1
2151 || (param->i_threads == X264_THREADS_AUTO && x264_cpu_num_processors() > 1) )
2152 {
2153- if( thread_input.open_file( NULL, &opt->hin, &info, NULL ) )
2154- {
2155- fprintf( stderr, "x264 [error]: threaded input failed\n" );
2156- return -1;
2157- }
2158- else
2159- input = thread_input;
2160+ FAIL_IF_ERROR( thread_input.open_file( NULL, &opt->hin, &info, NULL ), "threaded input failed\n" )
2161+ input = thread_input;
2162 }
2163 #endif
2164
2165@@ -1321,7 +1320,7 @@ static void parse_qpfile( cli_opt_t *opt, x264_picture_t *pic, int i_frame )
2166 else ret = 0;
2167 if( ret != 3 || qp < -1 || qp > 51 )
2168 {
2169- fprintf( stderr, "x264 [error]: can't parse qpfile for frame %d\n", i_frame );
2170+ x264_cli_log( "x264", X264_LOG_ERROR, "can't parse qpfile for frame %d\n", i_frame );
2171 fclose( opt->qpfile );
2172 opt->qpfile = NULL;
2173 pic->i_type = X264_TYPE_AUTO;
2174@@ -1344,11 +1343,7 @@ static int Encode_frame( x264_t *h, hnd_t hout, x264_picture_t *pic, int64_t *l
2175
2176 i_frame_size = x264_encoder_encode( h, &nal, &i_nal, pic, &pic_out );
2177
2178- if( i_frame_size < 0 )
2179- {
2180- fprintf( stderr, "x264 [error]: x264_encoder_encode failed\n" );
2181- return -1;
2182- }
2183+ FAIL_IF_ERROR( i_frame_size < 0, "x264_encoder_encode failed\n" );
2184
2185 if( i_frame_size )
2186 {
2187@@ -1424,17 +1419,14 @@ static int Encode( x264_param_t *param, cli_opt_t *opt )
2188 param->b_pic_struct = 1;
2189 pulldown = &pulldown_values[opt->i_pulldown];
2190 param->i_timebase_num = param->i_fps_den;
2191- if( fmod( param->i_fps_num * pulldown->fps_factor, 1 ) )
2192- {
2193- fprintf( stderr, "x264 [error]: unsupported framerate for chosen pulldown\n" );
2194- return -1;
2195- }
2196+ FAIL_IF_ERROR( fmod( param->i_fps_num * pulldown->fps_factor, 1 ),
2197+ "unsupported framerate for chosen pulldown\n" )
2198 param->i_timebase_den = param->i_fps_num * pulldown->fps_factor;
2199 }
2200
2201 if( ( h = x264_encoder_open( param ) ) == NULL )
2202 {
2203- fprintf( stderr, "x264 [error]: x264_encoder_open failed\n" );
2204+ x264_cli_log( "x264", X264_LOG_ERROR, "x264_encoder_open failed\n" );
2205 input.close_file( opt->hin );
2206 return -1;
2207 }
2208@@ -1445,27 +1437,19 @@ static int Encode( x264_param_t *param, cli_opt_t *opt )
2209
2210 if( output.set_param( opt->hout, param ) )
2211 {
2212- fprintf( stderr, "x264 [error]: can't set outfile param\n" );
2213+ x264_cli_log( "x264", X264_LOG_ERROR, "can't set outfile param\n" );
2214 input.close_file( opt->hin );
2215 output.close_file( opt->hout, largest_pts, second_largest_pts );
2216 return -1;
2217 }
2218
2219 /* Create a new pic */
2220- if( input.picture_alloc( &pic, param->i_csp, param->i_width, param->i_height ) )
2221- {
2222- fprintf( stderr, "x264 [error]: malloc failed\n" );
2223- return -1;
2224- }
2225+ FAIL_IF_ERROR( input.picture_alloc( &pic, param->i_csp, param->i_width, param->i_height ), "malloc failed\n" )
2226
2227 i_start = x264_mdate();
2228 /* ticks/frame = ticks/second / frames/second */
2229 ticks_per_frame = (int64_t)param->i_timebase_den * param->i_fps_den / param->i_timebase_num / param->i_fps_num;
2230- if( ticks_per_frame < 1 )
2231- {
2232- fprintf( stderr, "x264 [error]: ticks_per_frame invalid: %"PRId64"\n", ticks_per_frame );
2233- return -1;
2234- }
2235+ FAIL_IF_ERROR( ticks_per_frame < 1, "ticks_per_frame invalid: %"PRId64"\n", ticks_per_frame )
2236
2237 if( !param->b_repeat_headers )
2238 {
2239@@ -1473,12 +1457,7 @@ static int Encode( x264_param_t *param, cli_opt_t *opt )
2240 x264_nal_t *headers;
2241 int i_nal;
2242
2243- if( x264_encoder_headers( h, &headers, &i_nal ) < 0 )
2244- {
2245- fprintf( stderr, "x264 [error]: x264_encoder_headers failed\n" );
2246- return -1;
2247- }
2248-
2249+ FAIL_IF_ERROR( x264_encoder_headers( h, &headers, &i_nal ) < 0, "x264_encoder_headers failed\n" )
2250 if( (i_file = output.write_headers( opt->hout, headers )) < 0 )
2251 return -1;
2252 }
2253@@ -1508,15 +1487,12 @@ static int Encode( x264_param_t *param, cli_opt_t *opt )
2254
2255 if( pic.i_pts <= largest_pts )
2256 {
2257- if( param->i_log_level >= X264_LOG_WARNING )
2258- {
2259- if( param->i_log_level >= X264_LOG_DEBUG || pts_warning_cnt < MAX_PTS_WARNING )
2260- fprintf( stderr, "x264 [warning]: non-strictly-monotonic pts at frame %d (%"PRId64" <= %"PRId64")\n",
2261+ if( cli_log_level >= X264_LOG_DEBUG || pts_warning_cnt < MAX_PTS_WARNING )
2262+ x264_cli_log( "x264", X264_LOG_WARNING, "non-strictly-monotonic pts at frame %d (%"PRId64" <= %"PRId64")\n",
2263 i_frame, output_pts, largest_pts * dts_compress_multiplier );
2264- else if( pts_warning_cnt == MAX_PTS_WARNING )
2265- fprintf( stderr, "x264 [warning]: too many nonmonotonic pts warnings, suppressing further ones\n" );
2266- pts_warning_cnt++;
2267- }
2268+ else if( pts_warning_cnt == MAX_PTS_WARNING )
2269+ x264_cli_log( "x264", X264_LOG_WARNING, "too many nonmonotonic pts warnings, suppressing further ones\n" );
2270+ pts_warning_cnt++;
2271 pic.i_pts = largest_pts + ticks_per_frame;
2272 output_pts = pic.i_pts * dts_compress_multiplier;
2273 }
2274@@ -1573,8 +1549,8 @@ static int Encode( x264_param_t *param, cli_opt_t *opt )
2275 if( opt->b_progress && i_frame_output % i_update_interval == 0 && i_frame_output )
2276 Print_status( i_start, i_frame_output, i_frame_total, i_file, param, 2 * last_dts - prev_dts - first_dts );
2277 }
2278- if( pts_warning_cnt >= MAX_PTS_WARNING && param->i_log_level < X264_LOG_DEBUG )
2279- fprintf( stderr, "x264 [warning]: %d suppressed nonmonotonic pts warnings\n", pts_warning_cnt-MAX_PTS_WARNING );
2280+ if( pts_warning_cnt >= MAX_PTS_WARNING && cli_log_level < X264_LOG_DEBUG )
2281+ x264_cli_log( "x264", X264_LOG_WARNING, "%d suppressed nonmonotonic pts warnings\n", pts_warning_cnt-MAX_PTS_WARNING );
2282
2283 /* duration algorithm fails when only 1 frame is output */
2284 if( i_frame_output == 1 )
2285diff --git a/x264cli.h b/x264cli.h
2286new file mode 100644
2287index 0000000..1acca56
2288--- /dev/null
2289+++ b/x264cli.h
2290@@ -0,0 +1,67 @@
2291+/*****************************************************************************
2292+ * x264cli.h: x264cli common
2293+ *****************************************************************************
2294+ * Copyright (C) 2003-2010 x264 project
2295+ *
2296+ * Authors: Laurent Aimar <fenrir@via.ecp.fr>
2297+ * Loren Merritt <lorenm@u.washington.edu>
2298+ *
2299+ * This program is free software; you can redistribute it and/or modify
2300+ * it under the terms of the GNU General Public License as published by
2301+ * the Free Software Foundation; either version 2 of the License, or
2302+ * (at your option) any later version.
2303+ *
2304+ * This program is distributed in the hope that it will be useful,
2305+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2306+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2307+ * GNU General Public License for more details.
2308+ *
2309+ * You should have received a copy of the GNU General Public License
2310+ * along with this program; if not, write to the Free Software
2311+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
2312+ *****************************************************************************/
2313+
2314+#ifndef X264_CLI_H
2315+#define X264_CLI_H
2316+
2317+#include "common/common.h"
2318+
2319+typedef void *hnd_t;
2320+
2321+static inline int64_t gcd( int64_t a, int64_t b )
2322+{
2323+ while( 1 )
2324+ {
2325+ int64_t c = a % b;
2326+ if( !c )
2327+ return b;
2328+ a = b;
2329+ b = c;
2330+ }
2331+}
2332+
2333+static inline int64_t lcm( int64_t a, int64_t b )
2334+{
2335+ return ( a / gcd( a, b ) ) * b;
2336+}
2337+
2338+static inline char *get_filename_extension( char *filename )
2339+{
2340+ char *ext = filename + strlen( filename );
2341+ while( *ext != '.' && ext > filename )
2342+ ext--;
2343+ ext += *ext == '.';
2344+ return ext;
2345+}
2346+
2347+void x264_cli_log( const char *name, int i_level, const char *fmt, ... );
2348+void x264_cli_printf( int i_level, const char *fmt, ... );
2349+
2350+#define FAIL_IF_ERR( cond, name, ... )\
2351+if( cond )\
2352+{\
2353+ x264_cli_log( name, X264_LOG_ERROR, __VA_ARGS__ );\
2354+ return -1;\
2355+}
2356+
2357+#endif
2358--
23591.7.1
2360
2361
2362From f378994ab3c816aaab2b795143e31919fdee1f2d Mon Sep 17 00:00:00 2001
2363From: Jason Garrett-Glaser <darkshikari@gmail.com>
2364Date: Wed, 30 Jun 2010 13:06:22 -0700
2365Subject: [PATCH 5/7] Don't check i16x16 planar mode unless previous modes were useful
2366 Saves ~160 clocks per MB at subme=1, ~270 per MB at subme>1 (measured on Core i7).
2367 Negligle effect on compression.
2368
2369Also make a few more arrays static.
2370---
2371 encoder/analyse.c | 29 +++++++++++++++++++----------
2372 encoder/set.c | 3 ++-
2373 2 files changed, 21 insertions(+), 11 deletions(-)
2374
2375diff --git a/encoder/analyse.c b/encoder/analyse.c
2376index 696c78f..cdbdd1e 100644
2377--- a/encoder/analyse.c
2378+++ b/encoder/analyse.c
2379@@ -646,16 +646,27 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
2380 /* 16x16 prediction selection */
2381 const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra );
2382
2383+ /* Not heavily tuned */
2384+ static const uint8_t i16x16_thresh_lut[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
2385+ int i16x16_thresh = a->b_fast_intra ? (i16x16_thresh_lut[h->mb.i_subpel_refine]*i_satd_inter)>>1 : COST_MAX;
2386+
2387 if( !h->mb.b_lossless && predict_mode[3] >= 0 )
2388 {
2389 h->pixf.intra_mbcmp_x3_16x16( p_src, p_dst, a->i_satd_i16x16_dir );
2390- h->predict_16x16[I_PRED_16x16_P]( p_dst );
2391- a->i_satd_i16x16_dir[I_PRED_16x16_P] =
2392- h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
2393- for( int i = 0; i < 4; i++ )
2394+ a->i_satd_i16x16_dir[0] += lambda * bs_size_ue(0);
2395+ a->i_satd_i16x16_dir[1] += lambda * bs_size_ue(1);
2396+ a->i_satd_i16x16_dir[2] += lambda * bs_size_ue(2);
2397+ COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[0], a->i_predict16x16, 0 );
2398+ COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[1], a->i_predict16x16, 1 );
2399+ COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[2], a->i_predict16x16, 2 );
2400+
2401+ /* Plane is expensive, so don't check it unless one of the previous modes was useful. */
2402+ if( a->i_satd_i16x16 <= i16x16_thresh )
2403 {
2404- int cost = a->i_satd_i16x16_dir[i] += lambda * bs_size_ue(i);
2405- COPY2_IF_LT( a->i_satd_i16x16, cost, a->i_predict16x16, i );
2406+ h->predict_16x16[I_PRED_16x16_P]( p_dst );
2407+ a->i_satd_i16x16_dir[I_PRED_16x16_P] = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE );
2408+ a->i_satd_i16x16_dir[I_PRED_16x16_P] += lambda * bs_size_ue(3);
2409+ COPY2_IF_LT( a->i_satd_i16x16, a->i_satd_i16x16_dir[I_PRED_16x16_P], a->i_predict16x16, 3 );
2410 }
2411 }
2412 else
2413@@ -681,9 +692,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
2414 /* cavlc mb type prefix */
2415 a->i_satd_i16x16 += lambda * i_mb_b_cost_table[I_16x16];
2416
2417- /* Not heavily tuned */
2418- const uint8_t i16x16_thresh[11] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4 };
2419- if( a->b_fast_intra && a->i_satd_i16x16 > (i16x16_thresh[h->mb.i_subpel_refine]*i_satd_inter)>>1 )
2420+ if( a->i_satd_i16x16 > i16x16_thresh )
2421 return;
2422
2423 /* 8x8 prediction selection */
2424@@ -784,7 +793,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
2425 i_cost = (i_cost * cost_div_fix8[idx]) >> 8;
2426 }
2427 /* Not heavily tuned */
2428- const uint8_t i8x8_thresh[11] = { 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6 };
2429+ static const uint8_t i8x8_thresh[11] = { 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6 };
2430 if( X264_MIN(i_cost, a->i_satd_i16x16) > (i_satd_inter*i8x8_thresh[h->mb.i_subpel_refine])>>2 )
2431 return;
2432 }
2433diff --git a/encoder/set.c b/encoder/set.c
2434index 8d007aa..8ea6eac 100644
2435--- a/encoder/set.c
2436+++ b/encoder/set.c
2437@@ -534,7 +534,8 @@ int x264_sei_version_write( x264_t *h, bs_t *s )
2438 {
2439 int i;
2440 // random ID number generated according to ISO-11578
2441- const uint8_t uuid[16] = {
2442+ static const uint8_t uuid[16] =
2443+ {
2444 0xdc, 0x45, 0xe9, 0xbd, 0xe6, 0xd9, 0x48, 0xb7,
2445 0x96, 0x2c, 0xd8, 0x20, 0xd9, 0x23, 0xee, 0xef
2446 };
2447--
24481.7.1
2449
2450
2451From 293b47bc2c52abe5143913ef3d126e6170edaf5c Mon Sep 17 00:00:00 2001
2452From: Jason Garrett-Glaser <darkshikari@gmail.com>
2453Date: Wed, 30 Jun 2010 13:55:46 -0700
2454Subject: [PATCH 6/7] Support infinite keyint (--keyint infinite).
2455 This just means x264 won't insert non-scenecut keyframes.
2456 Useful for streaming when using interactive error recovery or some other mechanism that makes keyframes unnecessary.
2457
2458Also change POC logic to limit POC/framenum LSB size (to save bits per slice).
2459Also fix a bug in the CPB underflow detection code (didn't affect the bitstream, just resulted in the failure to print certain warning messages).
2460---
2461 common/common.c | 7 ++++---
2462 encoder/encoder.c | 8 ++++----
2463 encoder/ratecontrol.c | 10 +++++-----
2464 encoder/set.c | 29 +++++++++++++++++------------
2465 encoder/slicetype.c | 2 +-
2466 x264.c | 2 +-
2467 x264.h | 3 ++-
2468 7 files changed, 34 insertions(+), 27 deletions(-)
2469
2470diff --git a/common/common.c b/common/common.c
2471index 8c7cf3c..14dd716 100644
2472--- a/common/common.c
2473+++ b/common/common.c
2474@@ -638,9 +638,10 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
2475 p->i_dpb_size = atoi(value);
2476 OPT("keyint")
2477 {
2478- p->i_keyint_max = atoi(value);
2479- if( p->i_keyint_min > p->i_keyint_max )
2480- p->i_keyint_min = p->i_keyint_max;
2481+ if( strstr( value, "infinite" ) )
2482+ p->i_keyint_max = X264_KEYINT_MAX_INFINITE;
2483+ else
2484+ p->i_keyint_max = atoi(value);
2485 }
2486 OPT2("min-keyint", "keyint-min")
2487 {
2488diff --git a/encoder/encoder.c b/encoder/encoder.c
2489index 5cd3307..31cb84a 100644
2490--- a/encoder/encoder.c
2491+++ b/encoder/encoder.c
2492@@ -567,8 +567,7 @@ static int x264_validate_parameters( x264_t *h )
2493
2494 h->param.i_frame_reference = x264_clip3( h->param.i_frame_reference, 1, 16 );
2495 h->param.i_dpb_size = x264_clip3( h->param.i_dpb_size, 1, 16 );
2496- if( h->param.i_keyint_max <= 0 )
2497- h->param.i_keyint_max = 1;
2498+ h->param.i_keyint_max = x264_clip3( h->param.i_keyint_max, 1, X264_KEYINT_MAX_INFINITE );
2499 if( h->param.i_scenecut_threshold < 0 )
2500 h->param.i_scenecut_threshold = 0;
2501 if( !h->param.analyse.i_subpel_refine && h->param.analyse.i_direct_mv_pred > X264_DIRECT_PRED_SPATIAL )
2502@@ -627,9 +626,10 @@ static int x264_validate_parameters( x264_t *h )
2503 h->param.rc.f_qcompress = x264_clip3f( h->param.rc.f_qcompress, 0.0, 1.0 );
2504 if( h->param.i_keyint_max == 1 || h->param.rc.f_qcompress == 1 )
2505 h->param.rc.b_mb_tree = 0;
2506- if( !h->param.rc.i_lookahead && !h->param.b_intra_refresh && h->param.rc.b_mb_tree )
2507+ if( (!h->param.b_intra_refresh && h->param.i_keyint_max != X264_KEYINT_MAX_INFINITE) &&
2508+ !h->param.rc.i_lookahead && h->param.rc.b_mb_tree )
2509 {
2510- x264_log( h, X264_LOG_WARNING, "lookaheadless mb-tree requires intra refresh\n" );
2511+ x264_log( h, X264_LOG_WARNING, "lookaheadless mb-tree requires intra refresh or infinite keyint\n" );
2512 h->param.rc.b_mb_tree = 0;
2513 }
2514 if( h->param.rc.b_stat_read )
2515diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
2516index 1030ef2..6fdaa98 100644
2517--- a/encoder/ratecontrol.c
2518+++ b/encoder/ratecontrol.c
2519@@ -492,13 +492,13 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
2520 // arbitrary
2521 #define MAX_DURATION 0.5
2522
2523- int max_cpb_output_delay = h->param.i_keyint_max * MAX_DURATION * h->sps->vui.i_time_scale / h->sps->vui.i_num_units_in_tick;
2524+ int max_cpb_output_delay = X264_MIN( h->param.i_keyint_max * MAX_DURATION * h->sps->vui.i_time_scale / h->sps->vui.i_num_units_in_tick, INT_MAX );
2525 int max_dpb_output_delay = h->sps->vui.i_max_dec_frame_buffering * MAX_DURATION * h->sps->vui.i_time_scale / h->sps->vui.i_num_units_in_tick;
2526 int max_delay = (int)(90000.0 * (double)h->sps->vui.hrd.i_cpb_size_unscaled / h->sps->vui.hrd.i_bit_rate_unscaled + 0.5);
2527
2528 h->sps->vui.hrd.i_initial_cpb_removal_delay_length = 2 + x264_clip3( 32 - x264_clz( max_delay ), 4, 22 );
2529- h->sps->vui.hrd.i_cpb_removal_delay_length = x264_clip3( 32 - x264_clz( max_cpb_output_delay ), 4, 32 );
2530- h->sps->vui.hrd.i_dpb_output_delay_length = x264_clip3( 32 - x264_clz( max_dpb_output_delay ), 4, 32 );
2531+ h->sps->vui.hrd.i_cpb_removal_delay_length = x264_clip3( 32 - x264_clz( max_cpb_output_delay ), 4, 31 );
2532+ h->sps->vui.hrd.i_dpb_output_delay_length = x264_clip3( 32 - x264_clz( max_dpb_output_delay ), 4, 31 );
2533
2534 #undef MAX_DURATION
2535
2536@@ -1781,10 +1781,10 @@ void x264_hrd_fullness( x264_t *h )
2537 uint64_t cpb_size = (uint64_t)h->sps->vui.hrd.i_cpb_size_unscaled * h->sps->vui.i_time_scale;
2538 uint64_t multiply_factor = 180000 / rct->hrd_multiply_denom;
2539
2540- if( cpb_state < 0 || cpb_state > cpb_size )
2541+ if( rct->buffer_fill_final < 0 || rct->buffer_fill_final > cpb_size )
2542 {
2543 x264_log( h, X264_LOG_WARNING, "CPB %s: %.0lf bits in a %.0lf-bit buffer\n",
2544- cpb_state < 0 ? "underflow" : "overflow", (float)cpb_state/denom, (float)cpb_size/denom );
2545+ rct->buffer_fill_final < 0 ? "underflow" : "overflow", (float)rct->buffer_fill_final/denom, (float)cpb_size/denom );
2546 }
2547
2548 h->initial_cpb_removal_delay = (multiply_factor * cpb_state + denom) / (2*denom);
2549diff --git a/encoder/set.c b/encoder/set.c
2550index 8ea6eac..9e6e736 100644
2551--- a/encoder/set.c
2552+++ b/encoder/set.c
2553@@ -99,6 +99,7 @@ static void x264_sei_write( bs_t *s, uint8_t *p_start )
2554 void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
2555 {
2556 sps->i_id = i_id;
2557+ int max_frame_num;
2558
2559 sps->b_qpprime_y_zero_transform_bypass = param->rc.i_rc_method == X264_RC_CQP && param->rc.i_qp_constant == 0;
2560 if( sps->b_qpprime_y_zero_transform_bypass )
2561@@ -118,15 +119,27 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
2562 /* Never set constraint_set2, it is not necessary and not used in real world. */
2563 sps->b_constraint_set2 = 0;
2564
2565- sps->i_log2_max_frame_num = 4; /* at least 4 */
2566- while( (1 << sps->i_log2_max_frame_num) <= param->i_keyint_max && sps->i_log2_max_frame_num < 10 )
2567+ sps->vui.i_num_reorder_frames = param->i_bframe_pyramid ? 2 : param->i_bframe ? 1 : 0;
2568+ /* extra slot with pyramid so that we don't have to override the
2569+ * order of forgetting old pictures */
2570+ sps->vui.i_max_dec_frame_buffering =
2571+ sps->i_num_ref_frames = X264_MIN(16, X264_MAX4(param->i_frame_reference, 1 + sps->vui.i_num_reorder_frames,
2572+ param->i_bframe_pyramid ? 4 : 1, param->i_dpb_size));
2573+ sps->i_num_ref_frames -= param->i_bframe_pyramid == X264_B_PYRAMID_STRICT;
2574+
2575+ /* number of refs + current frame */
2576+ max_frame_num = sps->vui.i_max_dec_frame_buffering * (!!param->i_bframe_pyramid+1) + 1;
2577+ sps->i_log2_max_frame_num = 4;
2578+ while( (1 << sps->i_log2_max_frame_num) <= max_frame_num )
2579 sps->i_log2_max_frame_num++;
2580- sps->i_log2_max_frame_num++;
2581
2582 sps->i_poc_type = 0;
2583 if( sps->i_poc_type == 0 )
2584 {
2585- sps->i_log2_max_poc_lsb = sps->i_log2_max_frame_num + 1; /* max poc = 2*frame_num */
2586+ int max_delta_poc = (param->i_bframe + 2) * (!!param->i_bframe_pyramid + 1) * 2;
2587+ sps->i_log2_max_poc_lsb = 4;
2588+ while( (1 << sps->i_log2_max_poc_lsb) <= max_delta_poc * 2 )
2589+ sps->i_log2_max_poc_lsb++;
2590 }
2591 else if( sps->i_poc_type == 1 )
2592 {
2593@@ -219,14 +232,6 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
2594
2595 // NOTE: HRD related parts of the SPS are initialised in x264_ratecontrol_init_reconfigurable
2596
2597- sps->vui.i_num_reorder_frames = param->i_bframe_pyramid ? 2 : param->i_bframe ? 1 : 0;
2598- /* extra slot with pyramid so that we don't have to override the
2599- * order of forgetting old pictures */
2600- sps->vui.i_max_dec_frame_buffering =
2601- sps->i_num_ref_frames = X264_MIN(16, X264_MAX4(param->i_frame_reference, 1 + sps->vui.i_num_reorder_frames,
2602- param->i_bframe_pyramid ? 4 : 1, param->i_dpb_size));
2603- sps->i_num_ref_frames -= param->i_bframe_pyramid == X264_B_PYRAMID_STRICT;
2604-
2605 sps->vui.b_bitstream_restriction = 1;
2606 if( sps->vui.b_bitstream_restriction )
2607 {
2608diff --git a/encoder/slicetype.c b/encoder/slicetype.c
2609index 4ede8cf..7d69b71 100644
2610--- a/encoder/slicetype.c
2611+++ b/encoder/slicetype.c
2612@@ -1009,7 +1009,7 @@ static int scenecut_internal( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **f
2613 float f_thresh_max = h->param.i_scenecut_threshold / 100.0;
2614 /* magic numbers pulled out of thin air */
2615 float f_thresh_min = f_thresh_max * h->param.i_keyint_min
2616- / ( h->param.i_keyint_max * 4 );
2617+ / ( h->param.i_keyint_max * 4. );
2618 int res;
2619
2620 if( h->param.i_keyint_min == h->param.i_keyint_max )
2621diff --git a/x264.c b/x264.c
2622index 741570c..0bede93 100644
2623--- a/x264.c
2624+++ b/x264.c
2625@@ -409,7 +409,7 @@ static void Help( x264_param_t *defaults, int longhelp )
2626 H0( "\n" );
2627 H0( "Frame-type options:\n" );
2628 H0( "\n" );
2629- H0( " -I, --keyint <integer> Maximum GOP size [%d]\n", defaults->i_keyint_max );
2630+ H0( " -I, --keyint <integer or \"infinite\"> Maximum GOP size [%d]\n", defaults->i_keyint_max );
2631 H2( " -i, --min-keyint <integer> Minimum GOP size [auto]\n" );
2632 H2( " --no-scenecut Disable adaptive I-frame decision\n" );
2633 H2( " --scenecut <integer> How aggressively to insert extra I-frames [%d]\n", defaults->i_scenecut_threshold );
2634diff --git a/x264.h b/x264.h
2635index 86f7426..097365a 100644
2636--- a/x264.h
2637+++ b/x264.h
2638@@ -35,7 +35,7 @@
2639
2640 #include <stdarg.h>
2641
2642-#define X264_BUILD 101
2643+#define X264_BUILD 102
2644
2645 /* x264_t:
2646 * opaque handler for encoder */
2647@@ -152,6 +152,7 @@ typedef struct
2648 #define X264_B_PYRAMID_STRICT 1
2649 #define X264_B_PYRAMID_NORMAL 2
2650 #define X264_KEYINT_MIN_AUTO 0
2651+#define X264_KEYINT_MAX_INFINITE (1<<30)
2652 #define X264_OPEN_GOP_NONE 0
2653 #define X264_OPEN_GOP_NORMAL 1
2654 #define X264_OPEN_GOP_BLURAY 2
2655--
26561.7.1
2657
2658
2659From bcc80383b97693dc78f4ac545baa60a3b43e2ff0 Mon Sep 17 00:00:00 2001
2660From: Oskar Arvidsson <oskar@irock.se>
2661Date: Fri, 2 Jul 2010 04:06:08 +0200
2662Subject: [PATCH 7/7] Support for 9 and 10-bit encoding
2663 Output bit depth is specified on compilation time via --bit-depth.
2664 There is currently almost no assembly code available for high-bit-depth modes, so encoding will be very slow.
2665 Input is still 8-bit only; this will change in the future.
2666
2667Note that very few H.264 decoders support >8 bit depth currently.
2668---
2669 common/arm/mc-c.c | 42 +++++++-----
2670 common/arm/predict-c.c | 8 ++
2671 common/bitstream.h | 2 +-
2672 common/common.c | 17 ++++-
2673 common/common.h | 39 +++++++----
2674 common/dct.c | 15 +++-
2675 common/deblock.c | 27 +++++---
2676 common/macroblock.c | 2 +-
2677 common/macroblock.h | 66 ++++++++++++-------
2678 common/mc.c | 33 ++++-----
2679 common/mc.h | 2 +-
2680 common/pixel.c | 14 +++-
2681 common/ppc/dct.c | 2 +
2682 common/ppc/deblock.c | 2 +
2683 common/ppc/mc.c | 4 +
2684 common/ppc/pixel.c | 4 +
2685 common/ppc/predict.c | 6 ++
2686 common/ppc/quant.c | 2 +
2687 common/predict.c | 63 ++++++++++--------
2688 common/quant.c | 14 ++---
2689 common/set.c | 25 ++++++-
2690 common/x86/mc-c.c | 12 +++-
2691 common/x86/predict-c.c | 10 +++
2692 configure | 17 +++++
2693 encoder/analyse.c | 80 ++++++++++++-----------
2694 encoder/cabac.c | 25 ++++---
2695 encoder/cavlc.c | 24 +++----
2696 encoder/encoder.c | 45 +++++++++----
2697 encoder/macroblock.h | 4 +-
2698 encoder/me.h | 2 +-
2699 encoder/ratecontrol.c | 20 +++---
2700 encoder/rdo.c | 10 +--
2701 encoder/set.c | 8 ++-
2702 encoder/slicetype.c | 10 ++--
2703 tools/checkasm.c | 169 +++++++++++++++++++++++++----------------------
2704 x264.c | 24 ++++---
2705 x264.h | 4 +-
2706 37 files changed, 517 insertions(+), 336 deletions(-)
2707
2708diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c
2709index d294eff..b1106dd 100644
2710--- a/common/arm/mc-c.c
2711+++ b/common/arm/mc-c.c
2712@@ -64,6 +64,19 @@ MC_WEIGHT(_nodenom)
2713 MC_WEIGHT(_offsetadd)
2714 MC_WEIGHT(_offsetsub)
2715
2716+void x264_mc_copy_w4_neon( uint8_t *, int, uint8_t *, int, int );
2717+void x264_mc_copy_w8_neon( uint8_t *, int, uint8_t *, int, int );
2718+void x264_mc_copy_w16_neon( uint8_t *, int, uint8_t *, int, int );
2719+void x264_mc_copy_w16_aligned_neon( uint8_t *, int, uint8_t *, int, int );
2720+
2721+void x264_mc_chroma_neon( uint8_t *, int, uint8_t *, int, int, int, int, int );
2722+void x264_frame_init_lowres_core_neon( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, int, int, int, int);
2723+
2724+void x264_hpel_filter_v_neon( uint8_t *, uint8_t *, int16_t *, int, int );
2725+void x264_hpel_filter_c_neon( uint8_t *, int16_t *, int );
2726+void x264_hpel_filter_h_neon( uint8_t *, uint8_t *, int );
2727+
2728+#if !X264_HIGH_BIT_DEPTH
2729 static void x264_weight_cache_neon( x264_t *h, x264_weight_t *w )
2730 {
2731 if( w->i_scale == 1<<w->i_denom )
2732@@ -85,14 +98,6 @@ static void x264_weight_cache_neon( x264_t *h, x264_weight_t *w )
2733 w->weightfn = x264_mc_wtab_neon;
2734 }
2735
2736-void x264_mc_copy_w4_neon( uint8_t *, int, uint8_t *, int, int );
2737-void x264_mc_copy_w8_neon( uint8_t *, int, uint8_t *, int, int );
2738-void x264_mc_copy_w16_neon( uint8_t *, int, uint8_t *, int, int );
2739-void x264_mc_copy_w16_aligned_neon( uint8_t *, int, uint8_t *, int, int );
2740-
2741-void x264_mc_chroma_neon( uint8_t *, int, uint8_t *, int, int, int, int, int );
2742-void x264_frame_init_lowres_core_neon( uint8_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *, int, int, int, int);
2743-
2744 static void (* const x264_pixel_avg_wtab_neon[6])( uint8_t *, int, uint8_t *, int, uint8_t *, int ) =
2745 {
2746 NULL,
2747@@ -174,10 +179,6 @@ static uint8_t *get_ref_neon( uint8_t *dst, int *i_dst_stride,
2748 }
2749 }
2750
2751-void x264_hpel_filter_v_neon( uint8_t *, uint8_t *, int16_t *, int, int );
2752-void x264_hpel_filter_c_neon( uint8_t *, int16_t *, int );
2753-void x264_hpel_filter_h_neon( uint8_t *, uint8_t *, int );
2754-
2755 static void hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
2756 int stride, int width, int height, int16_t *buf )
2757 {
2758@@ -198,18 +199,22 @@ static void hpel_filter_neon( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8
2759 src += stride;
2760 }
2761 }
2762+#endif // !X264_HIGH_BIT_DEPTH
2763
2764 void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
2765 {
2766 if( !(cpu&X264_CPU_ARMV6) )
2767 return;
2768
2769+#if !X264_HIGH_BIT_DEPTH
2770 pf->prefetch_fenc = x264_prefetch_fenc_arm;
2771 pf->prefetch_ref = x264_prefetch_ref_arm;
2772+#endif // !X264_HIGH_BIT_DEPTH
2773
2774 if( !(cpu&X264_CPU_NEON) )
2775 return;
2776
2777+#if !X264_HIGH_BIT_DEPTH
2778 pf->copy_16x16_unaligned = x264_mc_copy_w16_neon;
2779 pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_neon;
2780 pf->copy[PIXEL_8x8] = x264_mc_copy_w8_neon;
2781@@ -229,15 +234,16 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
2782 pf->offsetsub = x264_mc_offsetsub_wtab_neon;
2783 pf->weight_cache = x264_weight_cache_neon;
2784
2785-// Apple's gcc stupidly cannot align stack variables, and ALIGNED_ARRAY can't work on structs
2786-#ifndef SYS_MACOSX
2787- pf->memcpy_aligned = x264_memcpy_aligned_neon;
2788-#endif
2789- pf->memzero_aligned = x264_memzero_aligned_neon;
2790-
2791 pf->mc_chroma = x264_mc_chroma_neon;
2792 pf->mc_luma = mc_luma_neon;
2793 pf->get_ref = get_ref_neon;
2794 pf->hpel_filter = hpel_filter_neon;
2795 pf->frame_init_lowres_core = x264_frame_init_lowres_core_neon;
2796+#endif // !X264_HIGH_BIT_DEPTH
2797+
2798+// Apple's gcc stupidly cannot align stack variables, and ALIGNED_ARRAY can't work on structs
2799+#ifndef SYS_MACOSX
2800+ pf->memcpy_aligned = x264_memcpy_aligned_neon;
2801+#endif
2802+ pf->memzero_aligned = x264_memzero_aligned_neon;
2803 }
2804diff --git a/common/arm/predict-c.c b/common/arm/predict-c.c
2805index fa7b9f7..b40dc9a 100644
2806--- a/common/arm/predict-c.c
2807+++ b/common/arm/predict-c.c
2808@@ -51,6 +51,7 @@ void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] )
2809 if (!(cpu&X264_CPU_ARMV6))
2810 return;
2811
2812+#if !X264_HIGH_BIT_DEPTH
2813 pf[I_PRED_4x4_H] = x264_predict_4x4_h_armv6;
2814 pf[I_PRED_4x4_DC] = x264_predict_4x4_dc_armv6;
2815 pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_armv6;
2816@@ -59,6 +60,7 @@ void x264_predict_4x4_init_arm( int cpu, x264_predict_t pf[12] )
2817 return;
2818
2819 pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_neon;
2820+#endif // !X264_HIGH_BIT_DEPTH
2821 }
2822
2823 void x264_predict_8x8c_init_arm( int cpu, x264_predict_t pf[7] )
2824@@ -66,12 +68,14 @@ void x264_predict_8x8c_init_arm( int cpu, x264_predict_t pf[7] )
2825 if (!(cpu&X264_CPU_NEON))
2826 return;
2827
2828+#if !X264_HIGH_BIT_DEPTH
2829 pf[I_PRED_CHROMA_DC] = x264_predict_8x8c_dc_neon;
2830 pf[I_PRED_CHROMA_DC_TOP] = x264_predict_8x8c_dc_top_neon;
2831 pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left_neon;
2832 pf[I_PRED_CHROMA_H] = x264_predict_8x8c_h_neon;
2833 pf[I_PRED_CHROMA_V] = x264_predict_8x8c_v_neon;
2834 pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_neon;
2835+#endif // !X264_HIGH_BIT_DEPTH
2836 }
2837
2838 void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_filter )
2839@@ -79,8 +83,10 @@ void x264_predict_8x8_init_arm( int cpu, x264_predict8x8_t pf[12], x264_predict_
2840 if (!(cpu&X264_CPU_NEON))
2841 return;
2842
2843+#if !X264_HIGH_BIT_DEPTH
2844 pf[I_PRED_8x8_DC] = x264_predict_8x8_dc_neon;
2845 pf[I_PRED_8x8_H] = x264_predict_8x8_h_neon;
2846+#endif // !X264_HIGH_BIT_DEPTH
2847 }
2848
2849 void x264_predict_16x16_init_arm( int cpu, x264_predict_t pf[7] )
2850@@ -88,10 +94,12 @@ void x264_predict_16x16_init_arm( int cpu, x264_predict_t pf[7] )
2851 if (!(cpu&X264_CPU_NEON))
2852 return;
2853
2854+#if !X264_HIGH_BIT_DEPTH
2855 pf[I_PRED_16x16_DC ] = x264_predict_16x16_dc_neon;
2856 pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_neon;
2857 pf[I_PRED_16x16_DC_LEFT]= x264_predict_16x16_dc_left_neon;
2858 pf[I_PRED_16x16_H ] = x264_predict_16x16_h_neon;
2859 pf[I_PRED_16x16_V ] = x264_predict_16x16_v_neon;
2860 pf[I_PRED_16x16_P ] = x264_predict_16x16_p_neon;
2861+#endif // !X264_HIGH_BIT_DEPTH
2862 }
2863diff --git a/common/bitstream.h b/common/bitstream.h
2864index dd8118d..318c790 100644
2865--- a/common/bitstream.h
2866+++ b/common/bitstream.h
2867@@ -53,7 +53,7 @@ typedef struct bs_s
2868 typedef struct
2869 {
2870 int last;
2871- int16_t level[16];
2872+ dctcoef level[16];
2873 uint8_t run[16];
2874 } x264_run_level_t;
2875
2876diff --git a/common/common.c b/common/common.c
2877index 14dd716..728dfab 100644
2878--- a/common/common.c
2879+++ b/common/common.c
2880@@ -91,10 +91,10 @@ void x264_param_default( x264_param_t *param )
2881 param->rc.i_vbv_max_bitrate = 0;
2882 param->rc.i_vbv_buffer_size = 0;
2883 param->rc.f_vbv_buffer_init = 0.9;
2884- param->rc.i_qp_constant = 23;
2885- param->rc.f_rf_constant = 23;
2886+ param->rc.i_qp_constant = 23 + QP_BD_OFFSET;
2887+ param->rc.f_rf_constant = 23 + QP_BD_OFFSET;
2888 param->rc.i_qp_min = 10;
2889- param->rc.i_qp_max = 51;
2890+ param->rc.i_qp_max = QP_MAX;
2891 param->rc.i_qp_step = 4;
2892 param->rc.f_ip_factor = 1.4;
2893 param->rc.f_pb_factor = 1.3;
2894@@ -418,6 +418,15 @@ int x264_param_apply_profile( x264_param_t *param, const char *profile )
2895 if( !profile )
2896 return 0;
2897
2898+#if BIT_DEPTH > 8
2899+ if( !strcasecmp( profile, "baseline" ) || !strcasecmp( profile, "main" ) ||
2900+ !strcasecmp( profile, "high" ) )
2901+ {
2902+ x264_log( NULL, X264_LOG_ERROR, "%s profile doesn't support a bit depth of %d.\n", profile, BIT_DEPTH );
2903+ return -1;
2904+ }
2905+#endif
2906+
2907 if( !strcasecmp( profile, "baseline" ) )
2908 {
2909 param->analyse.b_transform_8x8 = 0;
2910@@ -441,7 +450,7 @@ int x264_param_apply_profile( x264_param_t *param, const char *profile )
2911 param->analyse.b_transform_8x8 = 0;
2912 param->i_cqm_preset = X264_CQM_FLAT;
2913 }
2914- else if( !strcasecmp( profile, "high" ) )
2915+ else if( !strcasecmp( profile, "high" ) || !strcasecmp( profile, "high10" ) )
2916 {
2917 /* Default */
2918 }
2919diff --git a/common/common.h b/common/common.h
2920index 7b60811..a218d35 100644
2921--- a/common/common.h
2922+++ b/common/common.h
2923@@ -54,8 +54,13 @@ do {\
2924 #define X264_THREAD_MAX 128
2925 #define X264_PCM_COST (386*8)
2926 #define X264_LOOKAHEAD_MAX 250
2927+#define QP_BD_OFFSET (6*(BIT_DEPTH-8))
2928+#define QP_MAX (51+QP_BD_OFFSET)
2929+#define QP_MAX_MAX (51+2*6)
2930+#define LAMBDA_MAX (91 << (BIT_DEPTH-8))
2931+#define PIXEL_MAX ((1 << BIT_DEPTH)-1)
2932 // arbitrary, but low because SATD scores are 1/4 normal
2933-#define X264_LOOKAHEAD_QP 12
2934+#define X264_LOOKAHEAD_QP (12+QP_BD_OFFSET)
2935
2936 // number of pixels (per thread) in progress at any given time.
2937 // 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety
2938@@ -101,17 +106,23 @@ typedef union { x264_uint128_t i; uint64_t a[2]; uint32_t b[4]; uint16_t c[8]; u
2939 #define CP64(dst,src) M64(dst) = M64(src)
2940 #define CP128(dst,src) M128(dst) = M128(src)
2941
2942-typedef uint8_t pixel;
2943-typedef uint32_t pixel4;
2944-typedef int16_t dctcoef;
2945+#if X264_HIGH_BIT_DEPTH
2946+ typedef uint16_t pixel;
2947+ typedef uint64_t pixel4;
2948+ typedef int32_t dctcoef;
2949
2950-#define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
2951-#define MPIXEL_X4(src) M32(src)
2952-#define CPPIXEL_X4(dst,src) CP32(dst,src)
2953-#define CPPIXEL_X8(dst,src) CP64(dst,src)
2954-#define MDCT_X2(dct) M32(dct)
2955-#define CPDCT_X2(dst,src) CP32(dst,src)
2956-#define CPDCT_X4(dst,src) CP64(dst,src)
2957+# define PIXEL_SPLAT_X4(x) ((x)*0x0001000100010001ULL)
2958+# define MPIXEL_X4(src) M64(src)
2959+#else
2960+ typedef uint8_t pixel;
2961+ typedef uint32_t pixel4;
2962+ typedef int16_t dctcoef;
2963+
2964+# define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
2965+# define MPIXEL_X4(src) M32(src)
2966+#endif
2967+
2968+#define CPPIXEL_X4(dst,src) MPIXEL_X4(dst) = MPIXEL_X4(src)
2969
2970 #define X264_SCAN8_SIZE (6*8)
2971 #define X264_SCAN8_LUMA_SIZE (5*8)
2972@@ -189,7 +200,7 @@ void x264_init_vlc_tables();
2973
2974 static ALWAYS_INLINE pixel x264_clip_pixel( int x )
2975 {
2976- return x&(~255) ? (-x)>>31 : x;
2977+ return ( (x & ~PIXEL_MAX) ? (-x)>>31 & PIXEL_MAX : x );
2978 }
2979
2980 static ALWAYS_INLINE int x264_clip3( int v, int i_min, int i_max )
2981@@ -449,8 +460,8 @@ struct x264_t
2982 /* mv/ref cost arrays. Indexed by lambda instead of
2983 * qp because, due to rounding, some quantizers share
2984 * lambdas. This saves memory. */
2985- uint16_t *cost_mv[92];
2986- uint16_t *cost_mv_fpel[92][4];
2987+ uint16_t *cost_mv[LAMBDA_MAX+1];
2988+ uint16_t *cost_mv_fpel[LAMBDA_MAX+1][4];
2989
2990 const uint8_t *chroma_qp_table; /* includes both the nonlinear luma->chroma mapping and chroma_qp_offset */
2991
2992diff --git a/common/dct.c b/common/dct.c
2993index 60dbd55..cd27363 100644
2994--- a/common/dct.c
2995+++ b/common/dct.c
2996@@ -418,6 +418,7 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
2997 dctf->dct4x4dc = dct4x4dc;
2998 dctf->idct4x4dc = idct4x4dc;
2999
3000+#if !X264_HIGH_BIT_DEPTH
3001 #if HAVE_MMX
3002 if( cpu&X264_CPU_MMX )
3003 {
3004@@ -515,6 +516,7 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
3005 dctf->add16x16_idct8= x264_add16x16_idct8_neon;
3006 }
3007 #endif
3008+#endif // !X264_HIGH_BIT_DEPTH
3009 }
3010
3011 void x264_dct_init_weights( void )
3012@@ -599,11 +601,9 @@ static void zigzag_scan_4x4_frame( dctcoef level[16], dctcoef dct[16] )
3013
3014 static void zigzag_scan_4x4_field( dctcoef level[16], dctcoef dct[16] )
3015 {
3016- CPDCT_X2( level, dct );
3017+ memcpy( level, dct, 2 * sizeof(dctcoef) );
3018 ZIG(2,0,1) ZIG(3,2,0) ZIG(4,3,0) ZIG(5,1,1)
3019- CPDCT_X2( level+6, dct+6 );
3020- CPDCT_X4( level+8, dct+8 );
3021- CPDCT_X4( level+12, dct+12 );
3022+ memcpy( level+6, dct+6, 10 * sizeof(dctcoef) );
3023 }
3024
3025 #undef ZIG
3026@@ -618,6 +618,7 @@ static void zigzag_scan_4x4_field( dctcoef level[16], dctcoef dct[16] )
3027 CPPIXEL_X4( p_dst+1*FDEC_STRIDE, p_src+1*FENC_STRIDE );\
3028 CPPIXEL_X4( p_dst+2*FDEC_STRIDE, p_src+2*FENC_STRIDE );\
3029 CPPIXEL_X4( p_dst+3*FDEC_STRIDE, p_src+3*FENC_STRIDE );
3030+#define CPPIXEL_X8(dst,src) ( CPPIXEL_X4(dst,src), CPPIXEL_X4(dst+4,src+4) )
3031 #define COPY8x8\
3032 CPPIXEL_X8( p_dst+0*FDEC_STRIDE, p_src+0*FENC_STRIDE );\
3033 CPPIXEL_X8( p_dst+1*FDEC_STRIDE, p_src+1*FENC_STRIDE );\
3034@@ -709,6 +710,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
3035 pf->sub_8x8 = zigzag_sub_8x8_field;
3036 pf->sub_4x4 = zigzag_sub_4x4_field;
3037 pf->sub_4x4ac = zigzag_sub_4x4ac_field;
3038+#if !X264_HIGH_BIT_DEPTH
3039 #if HAVE_MMX
3040 if( cpu&X264_CPU_MMXEXT )
3041 {
3042@@ -726,6 +728,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
3043 if( cpu&X264_CPU_ALTIVEC )
3044 pf->scan_4x4 = x264_zigzag_scan_4x4_field_altivec;
3045 #endif
3046+#endif // !X264_HIGH_BIT_DEPTH
3047 }
3048 else
3049 {
3050@@ -734,6 +737,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
3051 pf->sub_8x8 = zigzag_sub_8x8_frame;
3052 pf->sub_4x4 = zigzag_sub_4x4_frame;
3053 pf->sub_4x4ac = zigzag_sub_4x4ac_frame;
3054+#if !X264_HIGH_BIT_DEPTH
3055 #if HAVE_MMX
3056 if( cpu&X264_CPU_MMX )
3057 pf->scan_4x4 = x264_zigzag_scan_4x4_frame_mmx;
3058@@ -759,13 +763,16 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
3059 if( cpu&X264_CPU_NEON )
3060 pf->scan_4x4 = x264_zigzag_scan_4x4_frame_neon;
3061 #endif
3062+#endif // !X264_HIGH_BIT_DEPTH
3063 }
3064
3065 pf->interleave_8x8_cavlc = zigzag_interleave_8x8_cavlc;
3066+#if !X264_HIGH_BIT_DEPTH
3067 #if HAVE_MMX
3068 if( cpu&X264_CPU_MMX )
3069 pf->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_mmx;
3070 if( cpu&X264_CPU_SHUFFLE_IS_FAST )
3071 pf->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_sse2;
3072 #endif
3073+#endif // !X264_HIGH_BIT_DEPTH
3074 }
3075diff --git a/common/deblock.c b/common/deblock.c
3076index db9c95d..0b3b6df 100644
3077--- a/common/deblock.c
3078+++ b/common/deblock.c
3079@@ -265,18 +265,19 @@ static void deblock_strength_c( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264
3080
3081 static inline void deblock_edge( x264_t *h, pixel *pix1, pixel *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_inter_t pf_inter )
3082 {
3083- int index_a = i_qp + h->sh.i_alpha_c0_offset;
3084- int alpha = alpha_table(index_a);
3085- int beta = beta_table(i_qp + h->sh.i_beta_offset);
3086+ int index_a = x264_clip3((i_qp-QP_BD_OFFSET + h->sh.i_alpha_c0_offset), 0, 51);
3087+ int index_b = x264_clip3((i_qp-QP_BD_OFFSET + h->sh.i_beta_offset), 0, 51);
3088+ int alpha = alpha_table(index_a) << (BIT_DEPTH-8);
3089+ int beta = beta_table(index_b) << (BIT_DEPTH-8);
3090 int8_t tc[4];
3091
3092 if( !M32(bS) || !alpha || !beta )
3093 return;
3094
3095- tc[0] = tc0_table(index_a)[bS[0]] + b_chroma;
3096- tc[1] = tc0_table(index_a)[bS[1]] + b_chroma;
3097- tc[2] = tc0_table(index_a)[bS[2]] + b_chroma;
3098- tc[3] = tc0_table(index_a)[bS[3]] + b_chroma;
3099+ tc[0] = (tc0_table(index_a)[bS[0]] << (BIT_DEPTH-8)) + b_chroma;
3100+ tc[1] = (tc0_table(index_a)[bS[1]] << (BIT_DEPTH-8)) + b_chroma;
3101+ tc[2] = (tc0_table(index_a)[bS[2]] << (BIT_DEPTH-8)) + b_chroma;
3102+ tc[3] = (tc0_table(index_a)[bS[3]] << (BIT_DEPTH-8)) + b_chroma;
3103
3104 pf_inter( pix1, i_stride, alpha, beta, tc );
3105 if( b_chroma )
3106@@ -285,8 +286,10 @@ static inline void deblock_edge( x264_t *h, pixel *pix1, pixel *pix2, int i_stri
3107
3108 static inline void deblock_edge_intra( x264_t *h, pixel *pix1, pixel *pix2, int i_stride, uint8_t bS[4], int i_qp, int b_chroma, x264_deblock_intra_t pf_intra )
3109 {
3110- int alpha = alpha_table(i_qp + h->sh.i_alpha_c0_offset);
3111- int beta = beta_table(i_qp + h->sh.i_beta_offset);
3112+ int index_a = x264_clip3((i_qp-QP_BD_OFFSET + h->sh.i_alpha_c0_offset), 0, 51);
3113+ int index_b = x264_clip3((i_qp-QP_BD_OFFSET + h->sh.i_beta_offset), 0, 51);
3114+ int alpha = alpha_table(index_a) << (BIT_DEPTH-8);
3115+ int beta = beta_table(index_b) << (BIT_DEPTH-8);
3116
3117 if( !alpha || !beta )
3118 return;
3119@@ -450,6 +453,7 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
3120 #if HAVE_MMX
3121 if( cpu&X264_CPU_MMXEXT )
3122 {
3123+#if !X264_HIGH_BIT_DEPTH
3124 pf->deblock_chroma[1] = x264_deblock_v_chroma_mmxext;
3125 pf->deblock_chroma[0] = x264_deblock_h_chroma_mmxext;
3126 pf->deblock_chroma_intra[1] = x264_deblock_v_chroma_intra_mmxext;
3127@@ -460,10 +464,12 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
3128 pf->deblock_luma_intra[1] = x264_deblock_v_luma_intra_mmxext;
3129 pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_mmxext;
3130 #endif
3131+#endif // !X264_HIGH_BIT_DEPTH
3132 pf->deblock_strength = x264_deblock_strength_mmxext;
3133 if( cpu&X264_CPU_SSE2 )
3134 {
3135 pf->deblock_strength = x264_deblock_strength_sse2;
3136+#if !X264_HIGH_BIT_DEPTH
3137 if( !(cpu&X264_CPU_STACK_MOD4) )
3138 {
3139 pf->deblock_luma[1] = x264_deblock_v_luma_sse2;
3140@@ -471,12 +477,14 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
3141 pf->deblock_luma_intra[1] = x264_deblock_v_luma_intra_sse2;
3142 pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_sse2;
3143 }
3144+#endif // !X264_HIGH_BIT_DEPTH
3145 }
3146 if( cpu&X264_CPU_SSSE3 )
3147 pf->deblock_strength = x264_deblock_strength_ssse3;
3148 }
3149 #endif
3150
3151+#if !X264_HIGH_BIT_DEPTH
3152 #if HAVE_ALTIVEC
3153 if( cpu&X264_CPU_ALTIVEC )
3154 {
3155@@ -494,4 +502,5 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
3156 pf->deblock_chroma[0] = x264_deblock_h_chroma_neon;
3157 }
3158 #endif
3159+#endif // !X264_HIGH_BIT_DEPTH
3160 }
3161diff --git a/common/macroblock.c b/common/macroblock.c
3162index 4561d8a..f0a624f 100644
3163--- a/common/macroblock.c
3164+++ b/common/macroblock.c
3165@@ -337,7 +337,7 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
3166 int scratch_size = 0;
3167 if( !b_lookahead )
3168 {
3169- int buf_hpel = (h->thread[0]->fdec->i_width[0]+48) * sizeof(int16_t);
3170+ int buf_hpel = (h->thread[0]->fdec->i_width[0]+48) * sizeof(dctcoef);
3171 int buf_ssim = h->param.analyse.b_ssim * 8 * (h->param.i_width/4+3) * sizeof(int);
3172 int me_range = X264_MIN(h->param.analyse.i_me_range, h->param.analyse.i_mv_range);
3173 int buf_tesa = (h->param.analyse.i_me_method >= X264_ME_ESA) *
3174diff --git a/common/macroblock.h b/common/macroblock.h
3175index 1a4992f..e09cd55 100644
3176--- a/common/macroblock.h
3177+++ b/common/macroblock.h
3178@@ -238,17 +238,30 @@ static const uint16_t block_idx_xy_fdec[16] =
3179 2*4 + 3*4*FDEC_STRIDE, 3*4 + 3*4*FDEC_STRIDE
3180 };
3181
3182-static const uint8_t i_chroma_qp_table[52+12*2] =
3183+#define QP(qP) ( (qP)+QP_BD_OFFSET )
3184+static const uint8_t i_chroma_qp_table[QP_MAX+1+12*2] =
3185 {
3186- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
3187- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
3188- 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
3189- 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
3190- 29, 30, 31, 32, 32, 33, 34, 34, 35, 35,
3191- 36, 36, 37, 37, 37, 38, 38, 38, 39, 39,
3192- 39, 39,
3193- 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39,
3194+ 0, 0, 0, 0, 0, 0,
3195+ 0, 0, 0, 0, 0, 0,
3196+#if BIT_DEPTH > 9
3197+ QP(-12),QP(-11),QP(-10), QP(-9), QP(-8), QP(-7),
3198+#endif
3199+#if BIT_DEPTH > 8
3200+ QP(-6), QP(-5), QP(-4), QP(-3), QP(-2), QP(-1),
3201+#endif
3202+ QP(0), QP(1), QP(2), QP(3), QP(4), QP(5),
3203+ QP(6), QP(7), QP(8), QP(9), QP(10), QP(11),
3204+ QP(12), QP(13), QP(14), QP(15), QP(16), QP(17),
3205+ QP(18), QP(19), QP(20), QP(21), QP(22), QP(23),
3206+ QP(24), QP(25), QP(26), QP(27), QP(28), QP(29),
3207+ QP(29), QP(30), QP(31), QP(32), QP(32), QP(33),
3208+ QP(34), QP(34), QP(35), QP(35), QP(36), QP(36),
3209+ QP(37), QP(37), QP(37), QP(38), QP(38), QP(38),
3210+ QP(39), QP(39), QP(39), QP(39),
3211+ QP(39), QP(39), QP(39), QP(39), QP(39), QP(39),
3212+ QP(39), QP(39), QP(39), QP(39), QP(39), QP(39),
3213 };
3214+#undef QP
3215
3216 enum cabac_ctx_block_cat_e
3217 {
3218@@ -340,26 +353,31 @@ static ALWAYS_INLINE uint32_t pack16to32_mask( int a, int b )
3219 return (a&0xFFFF) + (b<<16);
3220 #endif
3221 }
3222+static ALWAYS_INLINE uint64_t pack32to64( uint32_t a, uint32_t b )
3223+{
3224+#ifdef WORDS_BIGENDIAN
3225+ return b + ((uint64_t)a<<32);
3226+#else
3227+ return a + ((uint64_t)b<<32);
3228+#endif
3229+}
3230
3231-#define pack_pixel_1to2 pack8to16
3232-#define pack_pixel_2to4 pack16to32
3233+#if X264_HIGH_BIT_DEPTH
3234+# define pack_pixel_1to2 pack16to32
3235+# define pack_pixel_2to4 pack32to64
3236+#else
3237+# define pack_pixel_1to2 pack8to16
3238+# define pack_pixel_2to4 pack16to32
3239+#endif
3240
3241-#define array_non_zero(a) array_non_zero_int(a, sizeof(a))
3242+#define array_non_zero(a) array_non_zero_int(a, sizeof(a)/sizeof(dctcoef))
3243 #define array_non_zero_int array_non_zero_int
3244 static ALWAYS_INLINE int array_non_zero_int( dctcoef *v, int i_count )
3245 {
3246- if(i_count == 8)
3247- return !!M64( &v[0] );
3248- else if(i_count == 16)
3249- return !!(M64( &v[0] ) | M64( &v[4] ));
3250- else if(i_count == 32)
3251- return !!(M64( &v[0] ) | M64( &v[4] ) | M64( &v[8] ) | M64( &v[12] ));
3252- else
3253- {
3254- for( int i = 0; i < i_count; i+=4 )
3255- if( M64( &v[i] ) ) return 1;
3256- return 0;
3257- }
3258+ for( int i = 0; i < i_count; i++ )
3259+ if( v[i] )
3260+ return 1;
3261+ return 0;
3262 }
3263 static ALWAYS_INLINE int x264_mb_predict_intra4x4_mode( x264_t *h, int idx )
3264 {
3265diff --git a/common/mc.c b/common/mc.c
3266index 9776bec..5ef0682 100644
3267--- a/common/mc.c
3268+++ b/common/mc.c
3269@@ -117,11 +117,14 @@ static void x264_weight_cache( x264_t *h, x264_weight_t *w )
3270 {
3271 w->weightfn = h->mc.weight;
3272 }
3273-#define opscale(x) dst[x] = x264_clip_pixel( ((src[x] * weight->i_scale + (1<<(weight->i_denom - 1))) >> weight->i_denom) + weight->i_offset )
3274-#define opscale_noden(x) dst[x] = x264_clip_pixel( src[x] * weight->i_scale + weight->i_offset )
3275-static inline void mc_weight( pixel *dst, int i_dst_stride, pixel *src, int i_src_stride, const x264_weight_t *weight, int i_width, int i_height )
3276+#define opscale(x) dst[x] = x264_clip_pixel( ((src[x] * scale + (1<<(denom - 1))) >> denom) + offset )
3277+#define opscale_noden(x) dst[x] = x264_clip_pixel( src[x] * scale + offset )
3278+static void mc_weight( pixel *dst, int i_dst_stride, pixel *src, int i_src_stride, const x264_weight_t *weight, int i_width, int i_height )
3279 {
3280- if( weight->i_denom >= 1 )
3281+ int offset = weight->i_offset << (BIT_DEPTH-8);
3282+ int scale = weight->i_scale;
3283+ int denom = weight->i_denom;
3284+ if( denom >= 1 )
3285 {
3286 for( int y = 0; y < i_height; y++, dst += i_dst_stride, src += i_src_stride )
3287 for( int x = 0; x < i_width; x++ )
3288@@ -135,21 +138,10 @@ static inline void mc_weight( pixel *dst, int i_dst_stride, pixel *src, int i_sr
3289 }
3290 }
3291
3292-#define MC_WEIGHT_C( name, lx ) \
3293+#define MC_WEIGHT_C( name, width ) \
3294 static void name( pixel *dst, int i_dst_stride, pixel *src, int i_src_stride, const x264_weight_t *weight, int height ) \
3295 { \
3296- if( weight->i_denom >= 1 ) \
3297- { \
3298- for( int y = 0; y < height; y++, dst += i_dst_stride, src += i_src_stride ) \
3299- for( int x = 0; x < lx; x++ ) \
3300- opscale( x ); \
3301- } \
3302- else \
3303- { \
3304- for( int y = 0; y < height; y++, dst += i_dst_stride, src += i_src_stride ) \
3305- for( int x = 0; x < lx; x++ ) \
3306- opscale_noden( x ); \
3307- } \
3308+ mc_weight( dst, i_dst_stride, src, i_src_stride, weight, width, height );\
3309 }
3310
3311 MC_WEIGHT_C( mc_weight_w20, 20 )
3312@@ -182,7 +174,7 @@ static void mc_copy( pixel *src, int i_src_stride, pixel *dst, int i_dst_stride,
3313
3314 #define TAPFILTER(pix, d) ((pix)[x-2*d] + (pix)[x+3*d] - 5*((pix)[x-d] + (pix)[x+2*d]) + 20*((pix)[x] + (pix)[x+d]))
3315 static void hpel_filter( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
3316- int stride, int width, int height, int16_t *buf )
3317+ int stride, int width, int height, dctcoef *buf )
3318 {
3319 for( int y = 0; y < height; y++ )
3320 {
3321@@ -301,7 +293,12 @@ void x264_plane_copy_c( pixel *dst, int i_dst,
3322 {
3323 while( h-- )
3324 {
3325+#if X264_HIGH_BIT_DEPTH
3326+ for( int i = 0; i < w; i++ )
3327+ dst[i] = src[i] << (BIT_DEPTH-8);
3328+#else
3329 memcpy( dst, src, w );
3330+#endif
3331 dst += i_dst;
3332 src += i_src;
3333 }
3334diff --git a/common/mc.h b/common/mc.h
3335index bb16d13..cbdf1a6 100644
3336--- a/common/mc.h
3337+++ b/common/mc.h
3338@@ -82,7 +82,7 @@ typedef struct
3339 uint8_t *src, int i_src, int w, int h);
3340
3341 void (*hpel_filter)( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
3342- int i_stride, int i_width, int i_height, int16_t *buf );
3343+ int i_stride, int i_width, int i_height, dctcoef *buf );
3344
3345 /* prefetch the next few macroblocks of fenc or fdec */
3346 void (*prefetch_fenc)( pixel *pix_y, int stride_y,
3347diff --git a/common/pixel.c b/common/pixel.c
3348index 8441c7a..069589f 100644
3349--- a/common/pixel.c
3350+++ b/common/pixel.c
3351@@ -177,7 +177,7 @@ static int pixel_var2_8x8( pixel *pix1, int i_stride1, pixel *pix2, int i_stride
3352 pix2 += i_stride2;
3353 }
3354 sum = abs(sum);
3355- var = sqr - (sum * sum >> 6);
3356+ var = sqr - ((uint64_t)sum * sum >> 6);
3357 *ssd = sqr;
3358 return var;
3359 }
3360@@ -406,12 +406,14 @@ SAD_X( 8x4 )
3361 SAD_X( 4x8 )
3362 SAD_X( 4x4 )
3363
3364+#if !X264_HIGH_BIT_DEPTH
3365 #if ARCH_UltraSparc
3366 SAD_X( 16x16_vis )
3367 SAD_X( 16x8_vis )
3368 SAD_X( 8x16_vis )
3369 SAD_X( 8x8_vis )
3370 #endif
3371+#endif // !X264_HIGH_BIT_DEPTH
3372
3373 /****************************************************************************
3374 * pixel_satd_x4
3375@@ -444,6 +446,7 @@ SATD_X_DECL6( cpu )\
3376 SATD_X( 4x4, cpu )
3377
3378 SATD_X_DECL7()
3379+#if !X264_HIGH_BIT_DEPTH
3380 #if HAVE_MMX
3381 SATD_X_DECL7( _mmxext )
3382 SATD_X_DECL6( _sse2 )
3383@@ -454,6 +457,7 @@ SATD_X_DECL7( _sse4 )
3384 #if HAVE_ARMV6
3385 SATD_X_DECL7( _neon )
3386 #endif
3387+#endif // !X264_HIGH_BIT_DEPTH
3388
3389 #define INTRA_MBCMP_8x8( mbcmp )\
3390 void x264_intra_##mbcmp##_x3_8x8( pixel *fenc, pixel edge[33], int res[3] )\
3391@@ -520,8 +524,8 @@ static void ssim_4x4x2_core( const pixel *pix1, int stride1,
3392
3393 static float ssim_end1( int s1, int s2, int ss, int s12 )
3394 {
3395- static const int ssim_c1 = (int)(.01*.01*255*255*64 + .5);
3396- static const int ssim_c2 = (int)(.03*.03*255*255*64*63 + .5);
3397+ static const int ssim_c1 = (int)(.01*.01*PIXEL_MAX*PIXEL_MAX*64 + .5);
3398+ static const int ssim_c2 = (int)(.03*.03*PIXEL_MAX*PIXEL_MAX*64*63 + .5);
3399 int vars = ss*64 - s1*s1 - s2*s2;
3400 int covar = s12*64 - s1*s2;
3401 return (float)(2*s1*s2 + ssim_c1) * (float)(2*covar + ssim_c2)
3402@@ -678,6 +682,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
3403 pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16;
3404 pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16;
3405
3406+#if !X264_HIGH_BIT_DEPTH
3407 #if HAVE_MMX
3408 if( cpu&X264_CPU_MMX )
3409 {
3410@@ -903,17 +908,20 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
3411 }
3412 }
3413 #endif
3414+#endif // !X264_HIGH_BIT_DEPTH
3415 #if HAVE_ALTIVEC
3416 if( cpu&X264_CPU_ALTIVEC )
3417 {
3418 x264_pixel_altivec_init( pixf );
3419 }
3420 #endif
3421+#if !X264_HIGH_BIT_DEPTH
3422 #if ARCH_UltraSparc
3423 INIT4( sad, _vis );
3424 INIT4( sad_x3, _vis );
3425 INIT4( sad_x4, _vis );
3426 #endif
3427+#endif // !X264_HIGH_BIT_DEPTH
3428
3429 pixf->ads[PIXEL_8x16] =
3430 pixf->ads[PIXEL_8x4] =
3431diff --git a/common/ppc/dct.c b/common/ppc/dct.c
3432index eb223ae..85d5ce7 100644
3433--- a/common/ppc/dct.c
3434+++ b/common/ppc/dct.c
3435@@ -24,6 +24,7 @@
3436 #include "common/common.h"
3437 #include "ppccommon.h"
3438
3439+#if !X264_HIGH_BIT_DEPTH
3440 #define VEC_DCT(a0,a1,a2,a3,b0,b1,b2,b3) \
3441 b1 = vec_add( a0, a3 ); \
3442 b3 = vec_add( a1, a2 ); \
3443@@ -482,4 +483,5 @@ void x264_zigzag_scan_4x4_field_altivec( int16_t level[16], int16_t dct[4][4] )
3444 vec_st( tmp0v, 0x00, level );
3445 vec_st( tmp1v, 0x10, level );
3446 }
3447+#endif // !X264_HIGH_BIT_DEPTH
3448
3449diff --git a/common/ppc/deblock.c b/common/ppc/deblock.c
3450index 0c8d2d4..986710d 100644
3451--- a/common/ppc/deblock.c
3452+++ b/common/ppc/deblock.c
3453@@ -21,6 +21,7 @@
3454 #include "common/common.h"
3455 #include "ppccommon.h"
3456
3457+#if !X264_HIGH_BIT_DEPTH
3458 #define transpose4x16(r0, r1, r2, r3) \
3459 { \
3460 register vec_u8_t r4; \
3461@@ -292,3 +293,4 @@ void x264_deblock_h_luma_altivec( uint8_t *pix, int stride, int alpha, int beta,
3462 transpose4x16(line1, line2, line3, line4);
3463 write16x4(pix-2, stride, line1, line2, line3, line4);
3464 }
3465+#endif // !X264_HIGH_BIT_DEPTH
3466diff --git a/common/ppc/mc.c b/common/ppc/mc.c
3467index 7ad8050..744a804 100644
3468--- a/common/ppc/mc.c
3469+++ b/common/ppc/mc.c
3470@@ -33,6 +33,7 @@
3471 #include "mc.h"
3472 #include "ppccommon.h"
3473
3474+#if !X264_HIGH_BIT_DEPTH
3475 typedef void (*pf_mc_t)( uint8_t *src, int i_src,
3476 uint8_t *dst, int i_dst, int i_height );
3477
3478@@ -792,9 +793,11 @@ static void frame_init_lowres_core_altivec( uint8_t *src0, uint8_t *dst0, uint8_
3479 dstc += dst_stride;
3480 }
3481 }
3482+#endif // !X264_HIGH_BIT_DEPTH
3483
3484 void x264_mc_altivec_init( x264_mc_functions_t *pf )
3485 {
3486+#if !X264_HIGH_BIT_DEPTH
3487 pf->mc_luma = mc_luma_altivec;
3488 pf->get_ref = get_ref_altivec;
3489 pf->mc_chroma = mc_chroma_altivec;
3490@@ -804,4 +807,5 @@ void x264_mc_altivec_init( x264_mc_functions_t *pf )
3491
3492 pf->hpel_filter = x264_hpel_filter_altivec;
3493 pf->frame_init_lowres_core = frame_init_lowres_core_altivec;
3494+#endif // !X264_HIGH_BIT_DEPTH
3495 }
3496diff --git a/common/ppc/pixel.c b/common/ppc/pixel.c
3497index 3f99606..bd5f547 100644
3498--- a/common/ppc/pixel.c
3499+++ b/common/ppc/pixel.c
3500@@ -24,6 +24,7 @@
3501 #include "common/common.h"
3502 #include "ppccommon.h"
3503
3504+#if !X264_HIGH_BIT_DEPTH
3505 /***********************************************************************
3506 * SAD routines
3507 **********************************************************************/
3508@@ -1979,12 +1980,14 @@ static void ssim_4x4x2_core_altivec( const uint8_t *pix1, int stride1,
3509 sums[0][3] = temp[0];
3510 sums[1][3] = temp[1];
3511 }
3512+#endif // !X264_HIGH_BIT_DEPTH
3513
3514 /****************************************************************************
3515 * x264_pixel_init:
3516 ****************************************************************************/
3517 void x264_pixel_altivec_init( x264_pixel_function_t *pixf )
3518 {
3519+#if !X264_HIGH_BIT_DEPTH
3520 pixf->sad[PIXEL_16x16] = pixel_sad_16x16_altivec;
3521 pixf->sad[PIXEL_8x16] = pixel_sad_8x16_altivec;
3522 pixf->sad[PIXEL_16x8] = pixel_sad_16x8_altivec;
3523@@ -2023,4 +2026,5 @@ void x264_pixel_altivec_init( x264_pixel_function_t *pixf )
3524 pixf->hadamard_ac[PIXEL_8x8] = x264_pixel_hadamard_ac_8x8_altivec;
3525
3526 pixf->ssim_4x4x2_core = ssim_4x4x2_core_altivec;
3527+#endif // !X264_HIGH_BIT_DEPTH
3528 }
3529diff --git a/common/ppc/predict.c b/common/ppc/predict.c
3530index 3fb1a2b..c71dbb5 100644
3531--- a/common/ppc/predict.c
3532+++ b/common/ppc/predict.c
3533@@ -23,6 +23,7 @@
3534 #include "pixel.h"
3535 #include "ppccommon.h"
3536
3537+#if !X264_HIGH_BIT_DEPTH
3538 static void predict_8x8c_p_altivec( uint8_t *src )
3539 {
3540 int H = 0, V = 0;
3541@@ -194,6 +195,7 @@ static void predict_16x16_v_altivec( uint8_t *src )
3542 src += FDEC_STRIDE;
3543 }
3544 }
3545+#endif // !X264_HIGH_BIT_DEPTH
3546
3547
3548 /****************************************************************************
3549@@ -201,6 +203,7 @@ static void predict_16x16_v_altivec( uint8_t *src )
3550 ****************************************************************************/
3551 void x264_predict_16x16_init_altivec( x264_predict_t pf[7] )
3552 {
3553+#if !X264_HIGH_BIT_DEPTH
3554 pf[I_PRED_16x16_V ] = predict_16x16_v_altivec;
3555 pf[I_PRED_16x16_H ] = predict_16x16_h_altivec;
3556 pf[I_PRED_16x16_DC] = predict_16x16_dc_altivec;
3557@@ -208,9 +211,12 @@ void x264_predict_16x16_init_altivec( x264_predict_t pf[7] )
3558 pf[I_PRED_16x16_DC_LEFT] = predict_16x16_dc_left_altivec;
3559 pf[I_PRED_16x16_DC_TOP ] = predict_16x16_dc_top_altivec;
3560 pf[I_PRED_16x16_DC_128 ] = predict_16x16_dc_128_altivec;
3561+#endif // !X264_HIGH_BIT_DEPTH
3562 }
3563
3564 void x264_predict_8x8c_init_altivec( x264_predict_t pf[7] )
3565 {
3566+#if !X264_HIGH_BIT_DEPTH
3567 pf[I_PRED_CHROMA_P] = predict_8x8c_p_altivec;
3568+#endif // !X264_HIGH_BIT_DEPTH
3569 }
3570diff --git a/common/ppc/quant.c b/common/ppc/quant.c
3571index 6f41a06..ffd6a1b 100644
3572--- a/common/ppc/quant.c
3573+++ b/common/ppc/quant.c
3574@@ -22,6 +22,7 @@
3575 #include "ppccommon.h"
3576 #include "quant.h"
3577
3578+#if !X264_HIGH_BIT_DEPTH
3579 // quant of a whole 4x4 block, unrolled 2x and "pre-scheduled"
3580 #define QUANT_16_U( idx0, idx1 ) \
3581 { \
3582@@ -360,4 +361,5 @@ void x264_dequant_8x8_altivec( int16_t dct[8][8], int dequant_mf[6][8][8], int i
3583 DEQUANT_SHR();
3584 }
3585 }
3586+#endif // !X264_HIGH_BIT_DEPTH
3587
3588diff --git a/common/predict.c b/common/predict.c
3589index 79ec1fc..dc92083 100644
3590--- a/common/predict.c
3591+++ b/common/predict.c
3592@@ -53,40 +53,40 @@
3593
3594 void x264_predict_16x16_dc_c( pixel *src )
3595 {
3596- pixel4 dc = 0;
3597+ int dc = 0;
3598
3599 for( int i = 0; i < 16; i++ )
3600 {
3601 dc += src[-1 + i * FDEC_STRIDE];
3602 dc += src[i - FDEC_STRIDE];
3603 }
3604- dc = PIXEL_SPLAT_X4( ( dc + 16 ) >> 5 );
3605+ pixel4 dcsplat = PIXEL_SPLAT_X4( ( dc + 16 ) >> 5 );
3606
3607- PREDICT_16x16_DC( dc );
3608+ PREDICT_16x16_DC( dcsplat );
3609 }
3610 static void x264_predict_16x16_dc_left_c( pixel *src )
3611 {
3612- pixel4 dc = 0;
3613+ int dc = 0;
3614
3615 for( int i = 0; i < 16; i++ )
3616 dc += src[-1 + i * FDEC_STRIDE];
3617- dc = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 );
3618+ pixel4 dcsplat = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 );
3619
3620- PREDICT_16x16_DC( dc );
3621+ PREDICT_16x16_DC( dcsplat );
3622 }
3623 static void x264_predict_16x16_dc_top_c( pixel *src )
3624 {
3625- pixel4 dc = 0;
3626+ int dc = 0;
3627
3628 for( int i = 0; i < 16; i++ )
3629 dc += src[i - FDEC_STRIDE];
3630- dc = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 );
3631+ pixel4 dcsplat = PIXEL_SPLAT_X4( ( dc + 8 ) >> 4 );
3632
3633- PREDICT_16x16_DC( dc );
3634+ PREDICT_16x16_DC( dcsplat );
3635 }
3636 static void x264_predict_16x16_dc_128_c( pixel *src )
3637 {
3638- PREDICT_16x16_DC( PIXEL_SPLAT_X4( 0x80 ) );
3639+ PREDICT_16x16_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) );
3640 }
3641 void x264_predict_16x16_h_c( pixel *src )
3642 {
3643@@ -155,53 +155,53 @@ static void x264_predict_8x8c_dc_128_c( pixel *src )
3644 {
3645 for( int y = 0; y < 8; y++ )
3646 {
3647- MPIXEL_X4( src+0 ) = PIXEL_SPLAT_X4( 0x80 );
3648- MPIXEL_X4( src+4 ) = PIXEL_SPLAT_X4( 0x80 );
3649+ MPIXEL_X4( src+0 ) = PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) );
3650+ MPIXEL_X4( src+4 ) = PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) );
3651 src += FDEC_STRIDE;
3652 }
3653 }
3654 static void x264_predict_8x8c_dc_left_c( pixel *src )
3655 {
3656- pixel4 dc0 = 0, dc1 = 0;
3657+ int dc0 = 0, dc1 = 0;
3658
3659 for( int y = 0; y < 4; y++ )
3660 {
3661 dc0 += src[y * FDEC_STRIDE - 1];
3662 dc1 += src[(y+4) * FDEC_STRIDE - 1];
3663 }
3664- dc0 = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
3665- dc1 = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
3666+ pixel4 dc0splat = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
3667+ pixel4 dc1splat = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
3668
3669 for( int y = 0; y < 4; y++ )
3670 {
3671- MPIXEL_X4( src+0 ) = dc0;
3672- MPIXEL_X4( src+4 ) = dc0;
3673+ MPIXEL_X4( src+0 ) = dc0splat;
3674+ MPIXEL_X4( src+4 ) = dc0splat;
3675 src += FDEC_STRIDE;
3676 }
3677 for( int y = 0; y < 4; y++ )
3678 {
3679- MPIXEL_X4( src+0 ) = dc1;
3680- MPIXEL_X4( src+4 ) = dc1;
3681+ MPIXEL_X4( src+0 ) = dc1splat;
3682+ MPIXEL_X4( src+4 ) = dc1splat;
3683 src += FDEC_STRIDE;
3684 }
3685
3686 }
3687 static void x264_predict_8x8c_dc_top_c( pixel *src )
3688 {
3689- pixel4 dc0 = 0, dc1 = 0;
3690+ int dc0 = 0, dc1 = 0;
3691
3692 for( int x = 0; x < 4; x++ )
3693 {
3694 dc0 += src[x - FDEC_STRIDE];
3695 dc1 += src[x + 4 - FDEC_STRIDE];
3696 }
3697- dc0 = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
3698- dc1 = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
3699+ pixel4 dc0splat = PIXEL_SPLAT_X4( ( dc0 + 2 ) >> 2 );
3700+ pixel4 dc1splat = PIXEL_SPLAT_X4( ( dc1 + 2 ) >> 2 );
3701
3702 for( int y = 0; y < 8; y++ )
3703 {
3704- MPIXEL_X4( src+0 ) = dc0;
3705- MPIXEL_X4( src+4 ) = dc1;
3706+ MPIXEL_X4( src+0 ) = dc0splat;
3707+ MPIXEL_X4( src+4 ) = dc1splat;
3708 src += FDEC_STRIDE;
3709 }
3710 }
3711@@ -306,7 +306,7 @@ static void x264_predict_8x8c_p_c( pixel *src )
3712
3713 static void x264_predict_4x4_dc_128_c( pixel *src )
3714 {
3715- PREDICT_4x4_DC( PIXEL_SPLAT_X4( 0x80 ) );
3716+ PREDICT_4x4_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) );
3717 }
3718 static void x264_predict_4x4_dc_left_c( pixel *src )
3719 {
3720@@ -491,7 +491,8 @@ static void x264_predict_8x8_filter_c( pixel *src, pixel edge[33], int i_neighbo
3721 }
3722 else
3723 {
3724- M64( edge+24 ) = SRC(7,-1) * 0x0101010101010101ULL;
3725+ MPIXEL_X4( edge+24 ) = PIXEL_SPLAT_X4( SRC(7,-1) );
3726+ MPIXEL_X4( edge+28 ) = PIXEL_SPLAT_X4( SRC(7,-1) );
3727 edge[32] = SRC(7,-1);
3728 }
3729 }
3730@@ -523,7 +524,7 @@ static void x264_predict_8x8_filter_c( pixel *src, pixel edge[33], int i_neighbo
3731
3732 static void x264_predict_8x8_dc_128_c( pixel *src, pixel edge[33] )
3733 {
3734- PREDICT_8x8_DC( PIXEL_SPLAT_X4( 0x80 ) );
3735+ PREDICT_8x8_DC( PIXEL_SPLAT_X4( 1 << (BIT_DEPTH-1) ) );
3736 }
3737 static void x264_predict_8x8_dc_left_c( pixel *src, pixel edge[33] )
3738 {
3739@@ -554,9 +555,13 @@ void x264_predict_8x8_h_c( pixel *src, pixel edge[33] )
3740 }
3741 void x264_predict_8x8_v_c( pixel *src, pixel edge[33] )
3742 {
3743- uint64_t top = M64( edge+16 );
3744+ pixel4 top[2] = { MPIXEL_X4( edge+16 ),
3745+ MPIXEL_X4( edge+20 ) };
3746 for( int y = 0; y < 8; y++ )
3747- M64( src+y*FDEC_STRIDE ) = top;
3748+ {
3749+ MPIXEL_X4( src+y*FDEC_STRIDE+0 ) = top[0];
3750+ MPIXEL_X4( src+y*FDEC_STRIDE+4 ) = top[1];
3751+ }
3752 }
3753 static void x264_predict_8x8_ddl_c( pixel *src, pixel edge[33] )
3754 {
3755diff --git a/common/quant.c b/common/quant.c
3756index ece52f9..a7b72cf 100644
3757--- a/common/quant.c
3758+++ b/common/quant.c
3759@@ -142,7 +142,7 @@ static void x264_denoise_dct( dctcoef *dct, uint32_t *sum, uint16_t *offset, int
3760 for( int i = 1; i < size; i++ )
3761 {
3762 int level = dct[i];
3763- int sign = level>>15;
3764+ int sign = level>>31;
3765 level = (level+sign)^sign;
3766 sum[i] += level;
3767 level -= offset[i];
3768@@ -177,10 +177,7 @@ static int ALWAYS_INLINE x264_decimate_score_internal( dctcoef *dct, int i_max )
3769 int i_score = 0;
3770 int idx = i_max - 1;
3771
3772- /* Yes, dct[idx-1] is guaranteed to be 32-bit aligned. idx>=0 instead of 1 works correctly for the same reason */
3773- while( idx >= 0 && MDCT_X2( &dct[idx-1] ) == 0 )
3774- idx -= 2;
3775- if( idx >= 0 && dct[idx] == 0 )
3776+ while( idx >= 0 && dct[idx] == 0 )
3777 idx--;
3778 while( idx >= 0 )
3779 {
3780@@ -216,10 +213,7 @@ static int x264_decimate_score64( dctcoef *dct )
3781
3782 static int ALWAYS_INLINE x264_coeff_last_internal( dctcoef *l, int i_count )
3783 {
3784- int i_last;
3785- for( i_last = i_count-1; i_last >= 3; i_last -= 4 )
3786- if( M64( l+i_last-3 ) )
3787- break;
3788+ int i_last = i_count-1;
3789 while( i_last >= 0 && l[i_last] == 0 )
3790 i_last--;
3791 return i_last;
3792@@ -287,6 +281,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
3793 pf->coeff_level_run[ DCT_LUMA_AC] = x264_coeff_level_run15;
3794 pf->coeff_level_run[ DCT_LUMA_4x4] = x264_coeff_level_run16;
3795
3796+#if !X264_HIGH_BIT_DEPTH
3797 #if HAVE_MMX
3798 if( cpu&X264_CPU_MMX )
3799 {
3800@@ -425,6 +420,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
3801 pf->coeff_last[DCT_LUMA_8x8] = x264_coeff_last64_neon;
3802 }
3803 #endif
3804+#endif // !X264_HIGH_BIT_DEPTH
3805 pf->coeff_last[ DCT_LUMA_DC] = pf->coeff_last[DCT_LUMA_4x4];
3806 pf->coeff_last[DCT_CHROMA_AC] = pf->coeff_last[ DCT_LUMA_AC];
3807 pf->coeff_level_run[ DCT_LUMA_DC] = pf->coeff_level_run[DCT_LUMA_4x4];
3808diff --git a/common/set.c b/common/set.c
3809index 16cff8e..86f3854 100644
3810--- a/common/set.c
3811+++ b/common/set.c
3812@@ -78,6 +78,7 @@ int x264_cqm_init( x264_t *h )
3813 32 - 11, 32 - 21 };
3814 int max_qp_err = -1;
3815 int max_chroma_qp_err = -1;
3816+ int min_qp_err = QP_MAX+1;
3817
3818 for( int i = 0; i < 6; i++ )
3819 {
3820@@ -94,9 +95,9 @@ int x264_cqm_init( x264_t *h )
3821 }
3822 else
3823 {
3824- CHECKED_MALLOC( h-> quant4_mf[i], 52*size*sizeof(uint16_t) );
3825+ CHECKED_MALLOC( h-> quant4_mf[i], (QP_MAX+1)*size*sizeof(uint16_t) );
3826 CHECKED_MALLOC( h->dequant4_mf[i], 6*size*sizeof(int) );
3827- CHECKED_MALLOC( h->unquant4_mf[i], 52*size*sizeof(int) );
3828+ CHECKED_MALLOC( h->unquant4_mf[i], (QP_MAX+1)*size*sizeof(int) );
3829 }
3830
3831 for( j = (i<4 ? 0 : 4); j < i; j++ )
3832@@ -106,7 +107,7 @@ int x264_cqm_init( x264_t *h )
3833 if( j < i )
3834 h->quant4_bias[i] = h->quant4_bias[j];
3835 else
3836- CHECKED_MALLOC( h->quant4_bias[i], 52*size*sizeof(uint16_t) );
3837+ CHECKED_MALLOC( h->quant4_bias[i], (QP_MAX+1)*size*sizeof(uint16_t) );
3838 }
3839
3840 for( int q = 0; q < 6; q++ )
3841@@ -140,7 +141,7 @@ int x264_cqm_init( x264_t *h )
3842 quant8_mf[i_list][q][i] = DIV(def_quant8[q][i] * 16, h->pps->scaling_list[4+i_list][i]);
3843 }
3844 }
3845- for( int q = 0; q < 52; q++ )
3846+ for( int q = 0; q < QP_MAX+1; q++ )
3847 {
3848 int j;
3849 for( int i_list = 0; i_list < 4; i_list++ )
3850@@ -148,6 +149,11 @@ int x264_cqm_init( x264_t *h )
3851 {
3852 h->unquant4_mf[i_list][q][i] = (1ULL << (q/6 + 15 + 8)) / quant4_mf[i_list][q%6][i];
3853 h->quant4_mf[i_list][q][i] = j = SHIFT(quant4_mf[i_list][q%6][i], q/6 - 1);
3854+ if( !j )
3855+ {
3856+ min_qp_err = X264_MIN( min_qp_err, q );
3857+ continue;
3858+ }
3859 // round to nearest, unless that would cause the deadzone to be negative
3860 h->quant4_bias[i_list][q][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j );
3861 if( j > 0xffff && q > max_qp_err && (i_list == CQM_4IY || i_list == CQM_4PY) )
3862@@ -161,6 +167,11 @@ int x264_cqm_init( x264_t *h )
3863 {
3864 h->unquant8_mf[i_list][q][i] = (1ULL << (q/6 + 16 + 8)) / quant8_mf[i_list][q%6][i];
3865 h->quant8_mf[i_list][q][i] = j = SHIFT(quant8_mf[i_list][q%6][i], q/6);
3866+ if( !j )
3867+ {
3868+ min_qp_err = X264_MIN( min_qp_err, q );
3869+ continue;
3870+ }
3871 h->quant8_bias[i_list][q][i] = X264_MIN( DIV(deadzone[i_list]<<10, j), (1<<15)/j );
3872 if( j > 0xffff && q > max_qp_err )
3873 max_qp_err = q;
3874@@ -179,6 +190,12 @@ int x264_cqm_init( x264_t *h )
3875 x264_log( h, X264_LOG_ERROR, "but min chroma QP is implied to be %d.\n", h->chroma_qp_table[h->param.rc.i_qp_min] );
3876 return -1;
3877 }
3878+ if( !h->mb.b_lossless && min_qp_err <= h->param.rc.i_qp_max )
3879+ {
3880+ x264_log( h, X264_LOG_ERROR, "Quantization underflow. Your CQM is incompatible with QP > %d,\n", min_qp_err-1 );
3881+ x264_log( h, X264_LOG_ERROR, "but max QP is implied to be %d.\n", h->param.rc.i_qp_max );
3882+ return -1;
3883+ }
3884 return 0;
3885 fail:
3886 x264_cqm_delete( h );
3887diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c
3888index 2dcd671..4bb5f33 100644
3889--- a/common/x86/mc-c.c
3890+++ b/common/x86/mc-c.c
3891@@ -125,6 +125,7 @@ PIXEL_AVG_WALL(sse2)
3892 PIXEL_AVG_WALL(sse2_misalign)
3893 PIXEL_AVG_WALL(cache64_ssse3)
3894
3895+#if !X264_HIGH_BIT_DEPTH
3896 #define PIXEL_AVG_WTAB(instr, name1, name2, name3, name4, name5)\
3897 static void (* const x264_pixel_avg_wtab_##instr[6])( uint8_t *, int, uint8_t *, int, uint8_t *, int ) =\
3898 {\
3899@@ -355,24 +356,28 @@ static void x264_plane_copy_mmxext( uint8_t *dst, int i_dst, uint8_t *src, int i
3900 x264_plane_copy_core_mmxext( dst+i_dst, i_dst, src+i_src, i_src, (w+15)&~15, h-1 );
3901 }
3902 }
3903+#endif // !X264_HIGH_BIT_DEPTH
3904
3905 void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
3906 {
3907 if( !(cpu&X264_CPU_MMX) )
3908 return;
3909
3910+ pf->memcpy_aligned = x264_memcpy_aligned_mmx;
3911+ pf->memzero_aligned = x264_memzero_aligned_mmx;
3912+#if !X264_HIGH_BIT_DEPTH
3913 pf->copy_16x16_unaligned = x264_mc_copy_w16_mmx;
3914 pf->copy[PIXEL_16x16] = x264_mc_copy_w16_mmx;
3915 pf->copy[PIXEL_8x8] = x264_mc_copy_w8_mmx;
3916 pf->copy[PIXEL_4x4] = x264_mc_copy_w4_mmx;
3917- pf->memcpy_aligned = x264_memcpy_aligned_mmx;
3918- pf->memzero_aligned = x264_memzero_aligned_mmx;
3919 pf->integral_init4v = x264_integral_init4v_mmx;
3920 pf->integral_init8v = x264_integral_init8v_mmx;
3921+#endif // !X264_HIGH_BIT_DEPTH
3922
3923 if( !(cpu&X264_CPU_MMXEXT) )
3924 return;
3925
3926+#if !X264_HIGH_BIT_DEPTH
3927 pf->mc_luma = mc_luma_mmxext;
3928 pf->get_ref = get_ref_mmxext;
3929 pf->mc_chroma = x264_mc_chroma_mmxext;
3930@@ -412,12 +417,14 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
3931 pf->frame_init_lowres_core = x264_frame_init_lowres_core_cache32_mmxext;
3932 }
3933 #endif
3934+#endif // !X264_HIGH_BIT_DEPTH
3935
3936 if( !(cpu&X264_CPU_SSE2) )
3937 return;
3938
3939 pf->memcpy_aligned = x264_memcpy_aligned_sse2;
3940 pf->memzero_aligned = x264_memzero_aligned_sse2;
3941+#if !X264_HIGH_BIT_DEPTH
3942 pf->integral_init4v = x264_integral_init4v_sse2;
3943 pf->integral_init8v = x264_integral_init8v_sse2;
3944 pf->hpel_filter = x264_hpel_filter_sse2_amd;
3945@@ -492,4 +499,5 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
3946
3947 pf->integral_init4h = x264_integral_init4h_sse4;
3948 pf->integral_init8h = x264_integral_init8h_sse4;
3949+#endif // !X264_HIGH_BIT_DEPTH
3950 }
3951diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c
3952index e771431..4004265 100644
3953--- a/common/x86/predict-c.c
3954+++ b/common/x86/predict-c.c
3955@@ -75,6 +75,7 @@
3956 void x264_predict_16x16_v_sse2( uint8_t *src );
3957 void x264_predict_16x16_p_core_sse2( uint8_t *src, int i00, int b, int c );
3958
3959+#if !X264_HIGH_BIT_DEPTH
3960 ALIGNED_8( static const int8_t pb_12345678[8] ) = {1,2,3,4,5,6,7,8};
3961 ALIGNED_8( static const int8_t pb_m87654321[8] ) = {-8,-7,-6,-5,-4,-3,-2,-1};
3962 ALIGNED_8( static const int8_t pb_m32101234[8] ) = {-3,-2,-1,0,1,2,3,4};
3963@@ -364,6 +365,7 @@ INTRA_SA8D_X3(ssse3)
3964 #else
3965 INTRA_SA8D_X3(mmxext)
3966 #endif
3967+#endif // !X264_HIGH_BIT_DEPTH
3968
3969 /****************************************************************************
3970 * Exported functions:
3971@@ -372,6 +374,7 @@ void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] )
3972 {
3973 if( !(cpu&X264_CPU_MMX) )
3974 return;
3975+#if !X264_HIGH_BIT_DEPTH
3976 pf[I_PRED_16x16_V] = x264_predict_16x16_v_mmx;
3977 if( !(cpu&X264_CPU_MMXEXT) )
3978 return;
3979@@ -397,12 +400,14 @@ void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] )
3980 #ifdef __GNUC__
3981 pf[I_PRED_16x16_P] = x264_predict_16x16_p_ssse3;
3982 #endif
3983+#endif // !X264_HIGH_BIT_DEPTH
3984 }
3985
3986 void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] )
3987 {
3988 if( !(cpu&X264_CPU_MMX) )
3989 return;
3990+#if !X264_HIGH_BIT_DEPTH
3991 #if ARCH_X86_64
3992 pf[I_PRED_CHROMA_DC_LEFT] = x264_predict_8x8c_dc_left;
3993 #endif
3994@@ -424,12 +429,14 @@ void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] )
3995 #ifdef __GNUC__
3996 pf[I_PRED_CHROMA_P] = x264_predict_8x8c_p_ssse3;
3997 #endif
3998+#endif // !X264_HIGH_BIT_DEPTH
3999 }
4000
4001 void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_8x8_filter )
4002 {
4003 if( !(cpu&X264_CPU_MMXEXT) )
4004 return;
4005+#if !X264_HIGH_BIT_DEPTH
4006 pf[I_PRED_8x8_V] = x264_predict_8x8_v_mmxext;
4007 pf[I_PRED_8x8_H] = x264_predict_8x8_h_mmxext;
4008 pf[I_PRED_8x8_DC] = x264_predict_8x8_dc_mmxext;
4009@@ -456,12 +463,14 @@ void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_
4010 pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_ssse3;
4011 pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_ssse3;
4012 *predict_8x8_filter = x264_predict_8x8_filter_ssse3;
4013+#endif // !X264_HIGH_BIT_DEPTH
4014 }
4015
4016 void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
4017 {
4018 if( !(cpu&X264_CPU_MMXEXT) )
4019 return;
4020+#if !X264_HIGH_BIT_DEPTH
4021 pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_mmxext;
4022 pf[I_PRED_4x4_DDL] = x264_predict_4x4_ddl_mmxext;
4023 pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_mmxext;
4024@@ -474,4 +483,5 @@ void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
4025 pf[I_PRED_4x4_DDR] = x264_predict_4x4_ddr_ssse3;
4026 pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_ssse3;
4027 pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_ssse3;
4028+#endif // !X264_HIGH_BIT_DEPTH
4029 }
4030diff --git a/configure b/configure
4031index 24d15ad..43fbe39 100755
4032--- a/configure
4033+++ b/configure
4034@@ -18,6 +18,7 @@ echo " --enable-gprof adds -pg, doesn't strip"
4035 echo " --enable-visualize enables visualization (X11 only)"
4036 echo " --enable-pic build position-independent code"
4037 echo " --enable-shared build libx264.so"
4038+echo " --bit-depth=BIT_DEPTH sets output bit depth (8-10), default 8"
4039 echo " --extra-asflags=EASFLAGS add EASFLAGS to ASFLAGS"
4040 echo " --extra-cflags=ECFLAGS add ECFLAGS to CFLAGS"
4041 echo " --extra-ldflags=ELDFLAGS add ELDFLAGS to LDFLAGS"
4042@@ -124,6 +125,7 @@ gprof="no"
4043 pic="no"
4044 vis="no"
4045 shared="no"
4046+bit_depth="8"
4047
4048 CFLAGS="$CFLAGS -Wall -I."
4049 LDFLAGS="$LDFLAGS"
4050@@ -208,6 +210,14 @@ for opt do
4051 CFLAGS="$CFLAGS --sysroot=${opt#--sysroot=}"
4052 LDFLAGS="$LDFLAGS --sysroot=${opt#--sysroot=}"
4053 ;;
4054+ --bit-depth=*)
4055+ bit_depth="${opt#--bit-depth=}"
4056+ if [ "$bit_depth" -lt "8" -o "$bit_depth" -gt "10" ]; then
4057+ echo "Supplied bit depth must be in range [8,10]."
4058+ exit 1
4059+ fi
4060+ bit_depth=`expr $bit_depth + 0`
4061+ ;;
4062 *)
4063 echo "Unknown option $opt, ignored"
4064 ;;
4065@@ -644,6 +654,12 @@ if cc_check '' -Wshadow ; then
4066 CFLAGS="-Wshadow $CFLAGS"
4067 fi
4068
4069+if [ "$bit_depth" -gt "8" ]; then
4070+ define X264_HIGH_BIT_DEPTH
4071+fi
4072+
4073+define BIT_DEPTH $bit_depth
4074+
4075 rm -f conftest*
4076
4077 # generate config files
4078@@ -724,6 +740,7 @@ gprof: $gprof
4079 PIC: $pic
4080 shared: $shared
4081 visualize: $vis
4082+bit depth: $bit_depth
4083 EOF
4084
4085 echo >> config.log
4086diff --git a/encoder/analyse.c b/encoder/analyse.c
4087index cdbdd1e..93f7eed 100644
4088--- a/encoder/analyse.c
4089+++ b/encoder/analyse.c
4090@@ -134,25 +134,27 @@ typedef struct
4091 } x264_mb_analysis_t;
4092
4093 /* lambda = pow(2,qp/6-2) */
4094-const uint8_t x264_lambda_tab[52] = {
4095- 1, 1, 1, 1, 1, 1, 1, 1, /* 0-7 */
4096- 1, 1, 1, 1, /* 8-11 */
4097- 1, 1, 1, 1, 2, 2, 2, 2, /* 12-19 */
4098- 3, 3, 3, 4, 4, 4, 5, 6, /* 20-27 */
4099- 6, 7, 8, 9,10,11,13,14, /* 28-35 */
4100- 16,18,20,23,25,29,32,36, /* 36-43 */
4101- 40,45,51,57,64,72,81,91 /* 44-51 */
4102+const uint16_t x264_lambda_tab[QP_MAX_MAX+1] = {
4103+ 1, 1, 1, 1, 1, 1, 1, 1, /* 0- 7 */
4104+ 1, 1, 1, 1, 1, 1, 1, 1, /* 8-15 */
4105+ 2, 2, 2, 2, 3, 3, 3, 4, /* 16-23 */
4106+ 4, 4, 5, 6, 6, 7, 8, 9, /* 24-31 */
4107+ 10, 11, 13, 14, 16, 18, 20, 23, /* 32-39 */
4108+ 25, 29, 32, 36, 40, 45, 51, 57, /* 40-47 */
4109+ 64, 72, 81, 91, 102, 114, 128, 144, /* 48-55 */
4110+ 161, 181, 203, 228, 256, 287, 323, 362, /* 56-63 */
4111 };
4112
4113 /* lambda2 = pow(lambda,2) * .9 * 256 */
4114-const int x264_lambda2_tab[52] = {
4115- 14, 18, 22, 28, 36, 45, 57, 72, /* 0 - 7 */
4116- 91, 115, 145, 182, 230, 290, 365, 460, /* 8 - 15 */
4117- 580, 731, 921, 1161, 1462, 1843, 2322, 2925, /* 16 - 23 */
4118- 3686, 4644, 5851, 7372, 9289, 11703, 14745, 18578, /* 24 - 31 */
4119- 23407, 29491, 37156, 46814, 58982, 74313, 93628, 117964, /* 32 - 39 */
4120-148626, 187257, 235929, 297252, 374514, 471859, 594505, 749029, /* 40 - 47 */
4121-943718, 1189010, 1498059, 1887436 /* 48 - 51 */
4122+const int x264_lambda2_tab[QP_MAX_MAX+1] = {
4123+ 14, 18, 22, 28, 36, 45, 57, 72, /* 0- 7 */
4124+ 91, 115, 145, 182, 230, 290, 365, 460, /* 8-15 */
4125+ 580, 731, 921, 1161, 1462, 1843, 2322, 2925, /* 16-23 */
4126+ 3686, 4644, 5851, 7372, 9289, 11703, 14745, 18578, /* 24-31 */
4127+ 23407, 29491, 37156, 46814, 58982, 74313, 93628, 117964, /* 32-39 */
4128+ 148626, 187257, 235929, 297252, 374514, 471859, 594505, 749029, /* 40-47 */
4129+ 943718,1189010,1498059, 1887436, 2378021, 2996119, 3774873, 4756042, /* 48-55 */
4130+5992238,7549747,9512085,11984476,15099494,19024170,23968953,30198988, /* 56-63 */
4131 };
4132
4133 const uint8_t x264_exp2_lut[64] = {
4134@@ -188,27 +190,31 @@ const float x264_log2_lz_lut[32] = {
4135
4136 // should the intra and inter lambdas be different?
4137 // I'm just matching the behaviour of deadzone quant.
4138-static const int x264_trellis_lambda2_tab[2][52] = {
4139+static const int x264_trellis_lambda2_tab[2][QP_MAX_MAX+1] = {
4140 // inter lambda = .85 * .85 * 2**(qp/3. + 10 - LAMBDA_BITS)
4141- { 46, 58, 73, 92, 117, 147,
4142- 185, 233, 294, 370, 466, 587,
4143- 740, 932, 1174, 1480, 1864, 2349,
4144- 2959, 3728, 4697, 5918, 7457, 9395,
4145- 11837, 14914, 18790, 23674, 29828, 37581,
4146- 47349, 59656, 75163, 94699, 119313, 150326,
4147- 189399, 238627, 300652, 378798, 477255, 601304,
4148- 757596, 954511, 1202608, 1515192, 1909022, 2405217,
4149- 3030384, 3818045, 4810435, 6060769 },
4150+ { 46, 58, 73, 92, 117, 147,
4151+ 185, 233, 294, 370, 466, 587,
4152+ 740, 932, 1174, 1480, 1864, 2349,
4153+ 2959, 3728, 4697, 5918, 7457, 9395,
4154+ 11837, 14914, 18790, 23674, 29828, 37581,
4155+ 47349, 59656, 75163, 94699, 119313, 150326,
4156+ 189399, 238627, 300652, 378798, 477255, 601304,
4157+ 757596, 954511, 1202608, 1515192, 1909022, 2405217,
4158+ 3030384, 3818045, 4810435, 6060769, 7636091, 9620872,
4159+ 12121539,15272182,19241743,24243077,30544363,38483486,
4160+ 48486154,61088726,76966972,96972308 },
4161 // intra lambda = .65 * .65 * 2**(qp/3. + 10 - LAMBDA_BITS)
4162- { 27, 34, 43, 54, 68, 86,
4163- 108, 136, 172, 216, 273, 343,
4164- 433, 545, 687, 865, 1090, 1374,
4165- 1731, 2180, 2747, 3461, 4361, 5494,
4166- 6922, 8721, 10988, 13844, 17442, 21976,
4167- 27688, 34885, 43953, 55377, 69771, 87906,
4168- 110755, 139543, 175813, 221511, 279087, 351627,
4169- 443023, 558174, 703255, 886046, 1116348, 1406511,
4170- 1772093, 2232697, 2813022, 3544186 }
4171+ { 27, 34, 43, 54, 68, 86,
4172+ 108, 136, 172, 216, 273, 343,
4173+ 433, 545, 687, 865, 1090, 1374,
4174+ 1731, 2180, 2747, 3461, 4361, 5494,
4175+ 6922, 8721, 10988, 13844, 17442, 21976,
4176+ 27688, 34885, 43953, 55377, 69771, 87906,
4177+ 110755, 139543, 175813, 221511, 279087, 351627,
4178+ 443023, 558174, 703255, 886046, 1116348, 1406511,
4179+ 1772093, 2232697, 2813022, 3544186, 4465396, 5626046,
4180+ 7088374, 8930791,11252092,14176748,17861583,22504184,
4181+ 28353495,35723165,45008368,56706990 }
4182 };
4183
4184 static const uint16_t x264_chroma_lambda2_offset_tab[] = {
4185@@ -237,7 +243,7 @@ static const uint8_t i_sub_mb_p_cost_table[4] = {
4186
4187 static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a );
4188
4189-static uint16_t x264_cost_ref[92][3][33];
4190+static uint16_t x264_cost_ref[LAMBDA_MAX+1][3][33];
4191 static UNUSED x264_pthread_mutex_t cost_ref_mutex = X264_PTHREAD_MUTEX_INITIALIZER;
4192
4193 int x264_analyse_init_costs( x264_t *h, int qp )
4194@@ -275,7 +281,7 @@ fail:
4195
4196 void x264_analyse_free_costs( x264_t *h )
4197 {
4198- for( int i = 0; i < 92; i++ )
4199+ for( int i = 0; i < LAMBDA_MAX+1; i++ )
4200 {
4201 if( h->cost_mv[i] )
4202 x264_free( h->cost_mv[i] - 2*4*2048 );
4203diff --git a/encoder/cabac.c b/encoder/cabac.c
4204index 8bd40f1..e82d7e9 100644
4205--- a/encoder/cabac.c
4206+++ b/encoder/cabac.c
4207@@ -262,9 +262,9 @@ static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
4208 if( i_dqp != 0 )
4209 {
4210 int val = i_dqp <= 0 ? (-2*i_dqp) : (2*i_dqp - 1);
4211- /* dqp is interpreted modulo 52 */
4212- if( val >= 51 && val != 52 )
4213- val = 103 - val;
4214+ /* dqp is interpreted modulo (QP_MAX+1) */
4215+ if( val >= QP_MAX && val != QP_MAX+1 )
4216+ val = 2*QP_MAX+1 - val;
4217 do
4218 {
4219 x264_cabac_encode_decision( cb, 60 + ctx, 1 );
4220@@ -767,15 +767,18 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
4221 i_mb_pos_tex = x264_cabac_pos( cb );
4222 h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
4223
4224- memcpy( cb->p, h->mb.pic.p_fenc[0], 256 );
4225- cb->p += 256;
4226- for( int i = 0; i < 8; i++ )
4227- memcpy( cb->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 );
4228- cb->p += 64;
4229- for( int i = 0; i < 8; i++ )
4230- memcpy( cb->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
4231- cb->p += 64;
4232+ bs_t s;
4233+ bs_init( &s, cb->p, cb->p_end - cb->p );
4234
4235+ for( int i = 0; i < 256; i++ )
4236+ bs_write( &s, BIT_DEPTH, h->mb.pic.p_fenc[0][i] );
4237+ for( int ch = 0; ch < 2; ch++ )
4238+ for( int i = 0; i < 8; i++ )
4239+ for( int j = 0; j < 8; j++ )
4240+ bs_write( &s, BIT_DEPTH, h->mb.pic.p_fenc[ch][i*FENC_STRIDE+j] );
4241+
4242+ bs_flush( &s );
4243+ cb->p = s.p;
4244 x264_cabac_encode_init_core( cb );
4245
4246 h->stat.frame.i_tex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex;
4247diff --git a/encoder/cavlc.c b/encoder/cavlc.c
4248index e2f60b1..632ed41 100644
4249--- a/encoder/cavlc.c
4250+++ b/encoder/cavlc.c
4251@@ -66,7 +66,7 @@ static inline int block_residual_write_cavlc_escape( x264_t *h, int i_suffix_len
4252 bs_t *s = &h->out.bs;
4253 static const uint16_t next_suffix[7] = { 0, 3, 6, 12, 24, 48, 0xffff };
4254 int i_level_prefix = 15;
4255- int mask = level >> 15;
4256+ int mask = level >> 31;
4257 int abs_level = (level^mask)-mask;
4258 int i_level_code = abs_level*2-mask-2;
4259 if( ( i_level_code >> i_suffix_length ) < 15 )
4260@@ -219,10 +219,10 @@ static void cavlc_qp_delta( x264_t *h )
4261
4262 if( i_dqp )
4263 {
4264- if( i_dqp < -26 )
4265- i_dqp += 52;
4266- else if( i_dqp > 25 )
4267- i_dqp -= 52;
4268+ if( i_dqp < -(QP_MAX+1)/2 )
4269+ i_dqp += QP_MAX+1;
4270+ else if( i_dqp > QP_MAX/2 )
4271+ i_dqp -= QP_MAX+1;
4272 }
4273 bs_write_se( s, i_dqp );
4274 }
4275@@ -309,14 +309,12 @@ void x264_macroblock_write_cavlc( x264_t *h )
4276
4277 bs_align_0( s );
4278
4279- memcpy( s->p, h->mb.pic.p_fenc[0], 256 );
4280- s->p += 256;
4281- for( int i = 0; i < 8; i++ )
4282- memcpy( s->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 );
4283- s->p += 64;
4284- for( int i = 0; i < 8; i++ )
4285- memcpy( s->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
4286- s->p += 64;
4287+ for( int i = 0; i < 256; i++ )
4288+ bs_write( s, BIT_DEPTH, h->mb.pic.p_fenc[0][i] );
4289+ for( int ch = 0; ch < 2; ch++ )
4290+ for( int i = 0; i < 8; i++ )
4291+ for( int j = 0; j < 8; j++ )
4292+ bs_write( s, BIT_DEPTH, h->mb.pic.p_fenc[ch][i*FENC_STRIDE+j] );
4293
4294 bs_init( s, s->p, s->p_end - s->p );
4295 s->p_start = p_start;
4296diff --git a/encoder/encoder.c b/encoder/encoder.c
4297index 31cb84a..f7e0e38 100644
4298--- a/encoder/encoder.c
4299+++ b/encoder/encoder.c
4300@@ -51,7 +51,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
4301 ****************************************************************************/
4302 static float x264_psnr( int64_t i_sqe, int64_t i_size )
4303 {
4304- double f_mse = (double)i_sqe / ((double)65025.0 * (double)i_size);
4305+ double f_mse = (double)i_sqe / (PIXEL_MAX*PIXEL_MAX * (double)i_size);
4306 if( f_mse <= 0.0000000001 ) /* Max 100dB */
4307 return 100;
4308
4309@@ -68,11 +68,13 @@ static void x264_frame_dump( x264_t *h )
4310 FILE *f = fopen( h->param.psz_dump_yuv, "r+b" );
4311 if( !f )
4312 return;
4313+ int bytes_per_pixel = (BIT_DEPTH+7)/8;
4314 /* Write the frame in display order */
4315- fseek( f, (uint64_t)h->fdec->i_frame * h->param.i_height * h->param.i_width * 3/2, SEEK_SET );
4316+ fseek( f, (uint64_t)h->fdec->i_frame * h->param.i_height * h->param.i_width * 3/2 * bytes_per_pixel, SEEK_SET );
4317 for( int i = 0; i < h->fdec->i_plane; i++ )
4318 for( int y = 0; y < h->param.i_height >> !!i; y++ )
4319- fwrite( &h->fdec->plane[i][y*h->fdec->i_stride[i]], 1, h->param.i_width >> !!i, f );
4320+ for( int j = 0; j < h->param.i_width >> !!i; j++ )
4321+ fwrite( &h->fdec->plane[i][y*h->fdec->i_stride[i]]+j, bytes_per_pixel, 1, f );
4322 fclose( f );
4323 }
4324
4325@@ -469,8 +471,8 @@ static int x264_validate_parameters( x264_t *h )
4326 x264_log( h, X264_LOG_ERROR, "no ratecontrol method specified\n" );
4327 return -1;
4328 }
4329- h->param.rc.f_rf_constant = x264_clip3f( h->param.rc.f_rf_constant, 0, 51 );
4330- h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, 51 );
4331+ h->param.rc.f_rf_constant = x264_clip3f( h->param.rc.f_rf_constant, 0, QP_MAX );
4332+ h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, QP_MAX );
4333 if( h->param.rc.i_rc_method == X264_RC_CRF )
4334 {
4335 h->param.rc.i_qp_constant = h->param.rc.f_rf_constant;
4336@@ -502,12 +504,12 @@ static int x264_validate_parameters( x264_t *h )
4337 float qp_p = h->param.rc.i_qp_constant;
4338 float qp_i = qp_p - 6*log2f( h->param.rc.f_ip_factor );
4339 float qp_b = qp_p + 6*log2f( h->param.rc.f_pb_factor );
4340- h->param.rc.i_qp_min = x264_clip3( (int)(X264_MIN3( qp_p, qp_i, qp_b )), 0, 51 );
4341- h->param.rc.i_qp_max = x264_clip3( (int)(X264_MAX3( qp_p, qp_i, qp_b ) + .999), 0, 51 );
4342+ h->param.rc.i_qp_min = x264_clip3( (int)(X264_MIN3( qp_p, qp_i, qp_b )), 0, QP_MAX );
4343+ h->param.rc.i_qp_max = x264_clip3( (int)(X264_MAX3( qp_p, qp_i, qp_b ) + .999), 0, QP_MAX );
4344 h->param.rc.i_aq_mode = 0;
4345 h->param.rc.b_mb_tree = 0;
4346 }
4347- h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, 51 );
4348+ h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, QP_MAX );
4349 h->param.rc.i_qp_min = x264_clip3( h->param.rc.i_qp_min, 0, h->param.rc.i_qp_max );
4350 if( h->param.rc.i_vbv_buffer_size )
4351 {
4352@@ -1054,8 +1056,9 @@ x264_t *x264_encoder_open( x264_param_t *param )
4353 if( x264_analyse_init_costs( h, X264_LOOKAHEAD_QP ) )
4354 goto fail;
4355
4356+ static const uint16_t cost_mv_correct[7] = { 24, 47, 95, 189, 379, 757, 1515 };
4357 /* Checks for known miscompilation issues. */
4358- if( h->cost_mv[1][2013] != 24 )
4359+ if( h->cost_mv[x264_lambda_tab[X264_LOOKAHEAD_QP]][2013] != cost_mv_correct[BIT_DEPTH-8] )
4360 {
4361 x264_log( h, X264_LOG_ERROR, "MV cost test failed: x264 has been miscompiled!\n" );
4362 goto fail;
4363@@ -1147,11 +1150,22 @@ x264_t *x264_encoder_open( x264_param_t *param )
4364 fclose( f );
4365 }
4366
4367- x264_log( h, X264_LOG_INFO, "profile %s, level %d.%d\n",
4368- h->sps->i_profile_idc == PROFILE_BASELINE ? "Baseline" :
4369- h->sps->i_profile_idc == PROFILE_MAIN ? "Main" :
4370- h->sps->i_profile_idc == PROFILE_HIGH ? "High" :
4371- "High 4:4:4 Predictive", h->sps->i_level_idc/10, h->sps->i_level_idc%10 );
4372+ const char *profile = h->sps->i_profile_idc == PROFILE_BASELINE ? "Baseline" :
4373+ h->sps->i_profile_idc == PROFILE_MAIN ? "Main" :
4374+ h->sps->i_profile_idc == PROFILE_HIGH ? "High" :
4375+ h->sps->i_profile_idc == PROFILE_HIGH10 ? "High 10" :
4376+ "High 4:4:4 Predictive";
4377+
4378+ if( h->sps->i_profile_idc < PROFILE_HIGH10 )
4379+ {
4380+ x264_log( h, X264_LOG_INFO, "profile %s, level %d.%d\n",
4381+ profile, h->sps->i_level_idc/10, h->sps->i_level_idc%10 );
4382+ }
4383+ else
4384+ {
4385+ x264_log( h, X264_LOG_INFO, "profile %s, level %d.%d, bit depth %d\n",
4386+ profile, h->sps->i_level_idc/10, h->sps->i_level_idc%10, BIT_DEPTH );
4387+ }
4388
4389 return h;
4390 fail:
4391@@ -1836,7 +1850,7 @@ static int x264_slice_write( x264_t *h )
4392 bs_align_1( &h->out.bs );
4393
4394 /* init cabac */
4395- x264_cabac_context_init( &h->cabac, h->sh.i_type, h->sh.i_qp, h->sh.i_cabac_init_idc );
4396+ x264_cabac_context_init( &h->cabac, h->sh.i_type, x264_clip3( h->sh.i_qp-QP_BD_OFFSET, 0, 51 ), h->sh.i_cabac_init_idc );
4397 x264_cabac_encode_init ( &h->cabac, h->out.bs.p, h->out.bs.p_end );
4398 }
4399 h->mb.i_last_qp = h->sh.i_qp;
4400@@ -2705,6 +2719,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
4401 for( int i = 0; i < 3; i++ )
4402 {
4403 pic_out->img.i_stride[i] = h->fdec->i_stride[i];
4404+ // FIXME This breaks the API when pixel != uint8_t.
4405 pic_out->img.plane[i] = h->fdec->plane[i];
4406 }
4407
4408diff --git a/encoder/macroblock.h b/encoder/macroblock.h
4409index b1b02fa..7c83344 100644
4410--- a/encoder/macroblock.h
4411+++ b/encoder/macroblock.h
4412@@ -26,8 +26,8 @@
4413
4414 #include "common/macroblock.h"
4415
4416-extern const int x264_lambda2_tab[52];
4417-extern const uint8_t x264_lambda_tab[52];
4418+extern const int x264_lambda2_tab[QP_MAX_MAX+1];
4419+extern const uint16_t x264_lambda_tab[QP_MAX_MAX+1];
4420
4421 void x264_rdo_init( void );
4422
4423diff --git a/encoder/me.h b/encoder/me.h
4424index 912b05d..b125f3d 100644
4425--- a/encoder/me.h
4426+++ b/encoder/me.h
4427@@ -68,7 +68,7 @@ void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_wei
4428 void x264_me_refine_bidir_satd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight );
4429 uint64_t x264_rd_cost_part( x264_t *h, int i_lambda2, int i8, int i_pixel );
4430
4431-extern uint16_t *x264_cost_mv_fpel[92][4];
4432+extern uint16_t *x264_cost_mv_fpel[LAMBDA_MAX+1][4];
4433
4434 #define COPY1_IF_LT(x,y)\
4435 if((y)<(x))\
4436diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
4437index 6fdaa98..7b6e5f9 100644
4438--- a/encoder/ratecontrol.c
4439+++ b/encoder/ratecontrol.c
4440@@ -219,7 +219,7 @@ static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x2
4441 uint32_t ssd = res >> 32;
4442 frame->i_pixel_sum[i] += sum;
4443 frame->i_pixel_ssd[i] += ssd;
4444- return ssd - (sum * sum >> shift);
4445+ return ssd - ((uint64_t)sum * sum >> shift);
4446 }
4447
4448 // Find the total AC energy of the block in all planes.
4449@@ -300,7 +300,7 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_off
4450 avg_adj /= h->mb.i_mb_count;
4451 avg_adj_pow2 /= h->mb.i_mb_count;
4452 strength = h->param.rc.f_aq_strength * avg_adj;
4453- avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - 14.f) / avg_adj;
4454+ avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - (14.f * sqrtf(1 << (BIT_DEPTH-8), 0.5f))) / avg_adj;
4455 }
4456 else
4457 strength = h->param.rc.f_aq_strength * 1.0397f;
4458@@ -318,7 +318,7 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_off
4459 else
4460 {
4461 uint32_t energy = x264_ac_energy_mb( h, mb_x, mb_y, frame );
4462- qp_adj = strength * (x264_log2( X264_MAX(energy, 1) ) - 14.427f);
4463+ qp_adj = strength * (x264_log2( X264_MAX(energy, 1) ) - (14.427f + 2*(BIT_DEPTH-8)));
4464 }
4465 if( quant_offsets )
4466 qp_adj += quant_offsets[mb_xy];
4467@@ -620,8 +620,8 @@ int x264_ratecontrol_new( x264_t *h )
4468 rc->ip_offset = 6.0 * log2f( h->param.rc.f_ip_factor );
4469 rc->pb_offset = 6.0 * log2f( h->param.rc.f_pb_factor );
4470 rc->qp_constant[SLICE_TYPE_P] = h->param.rc.i_qp_constant;
4471- rc->qp_constant[SLICE_TYPE_I] = x264_clip3( h->param.rc.i_qp_constant - rc->ip_offset + 0.5, 0, 51 );
4472- rc->qp_constant[SLICE_TYPE_B] = x264_clip3( h->param.rc.i_qp_constant + rc->pb_offset + 0.5, 0, 51 );
4473+ rc->qp_constant[SLICE_TYPE_I] = x264_clip3( h->param.rc.i_qp_constant - rc->ip_offset + 0.5, 0, QP_MAX );
4474+ rc->qp_constant[SLICE_TYPE_B] = x264_clip3( h->param.rc.i_qp_constant + rc->pb_offset + 0.5, 0, QP_MAX );
4475 h->mb.ip_offset = rc->ip_offset + 0.5;
4476
4477 rc->lstep = pow( 2, h->param.rc.i_qp_step / 6.0 );
4478@@ -1231,7 +1231,7 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead )
4479
4480 rc->qpa_rc =
4481 rc->qpa_aq = 0;
4482- rc->qp = x264_clip3( (int)(q + 0.5), 0, 51 );
4483+ rc->qp = x264_clip3( (int)(q + 0.5), 0, QP_MAX );
4484 h->fdec->f_qp_avg_rc =
4485 h->fdec->f_qp_avg_aq =
4486 rc->qpm = q;
4487@@ -1416,9 +1416,9 @@ int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
4488 * So just calculate the average QP used so far. */
4489 h->param.rc.i_qp_constant = (h->stat.i_frame_count[SLICE_TYPE_P] == 0) ? 24
4490 : 1 + h->stat.f_frame_qp[SLICE_TYPE_P] / h->stat.i_frame_count[SLICE_TYPE_P];
4491- rc->qp_constant[SLICE_TYPE_P] = x264_clip3( h->param.rc.i_qp_constant, 0, 51 );
4492- rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, 51 );
4493- rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, 51 );
4494+ rc->qp_constant[SLICE_TYPE_P] = x264_clip3( h->param.rc.i_qp_constant, 0, QP_MAX );
4495+ rc->qp_constant[SLICE_TYPE_I] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) / fabs( h->param.rc.f_ip_factor )) + 0.5 ), 0, QP_MAX );
4496+ rc->qp_constant[SLICE_TYPE_B] = x264_clip3( (int)( qscale2qp( qp2qscale( h->param.rc.i_qp_constant ) * fabs( h->param.rc.f_pb_factor )) + 0.5 ), 0, QP_MAX );
4497
4498 x264_log(h, X264_LOG_ERROR, "2nd pass has more frames than 1st pass (%d)\n", rc->num_entries);
4499 x264_log(h, X264_LOG_ERROR, "continuing anyway, at constant QP=%d\n", h->param.rc.i_qp_constant);
4500@@ -2652,7 +2652,7 @@ static int init_pass2( x264_t *h )
4501 }
4502 else if( expected_bits > all_available_bits && avgq > h->param.rc.i_qp_max - 2 )
4503 {
4504- if( h->param.rc.i_qp_max < 51 )
4505+ if( h->param.rc.i_qp_max < QP_MAX )
4506 x264_log( h, X264_LOG_WARNING, "try increasing target bitrate or increasing qp_max (currently %d)\n", h->param.rc.i_qp_max );
4507 else
4508 x264_log( h, X264_LOG_WARNING, "try increasing target bitrate\n");
4509diff --git a/encoder/rdo.c b/encoder/rdo.c
4510index afaa894..4fae811 100644
4511--- a/encoder/rdo.c
4512+++ b/encoder/rdo.c
4513@@ -443,10 +443,7 @@ static ALWAYS_INLINE int quant_trellis_cabac( x264_t *h, dctcoef *dct,
4514 /* We only need to zero an empty 4x4 block. 8x8 can be
4515 implicitly emptied via zero nnz, as can dc. */
4516 if( i_coefs == 16 && !dc )
4517- {
4518- M128( &dct[0] ) = M128_ZERO;
4519- M128( &dct[8] ) = M128_ZERO;
4520- }
4521+ memset( dct, 0, 16 * sizeof(dctcoef) );
4522 return 0;
4523 }
4524
4525@@ -613,10 +610,7 @@ static ALWAYS_INLINE int quant_trellis_cabac( x264_t *h, dctcoef *dct,
4526 if( bnode == &nodes_cur[0] )
4527 {
4528 if( i_coefs == 16 && !dc )
4529- {
4530- M128( &dct[0] ) = M128_ZERO;
4531- M128( &dct[8] ) = M128_ZERO;
4532- }
4533+ memset( dct, 0, 16 * sizeof(dctcoef) );
4534 return 0;
4535 }
4536
4537diff --git a/encoder/set.c b/encoder/set.c
4538index 9e6e736..55d6df7 100644
4539--- a/encoder/set.c
4540+++ b/encoder/set.c
4541@@ -104,6 +104,8 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
4542 sps->b_qpprime_y_zero_transform_bypass = param->rc.i_rc_method == X264_RC_CQP && param->rc.i_qp_constant == 0;
4543 if( sps->b_qpprime_y_zero_transform_bypass )
4544 sps->i_profile_idc = PROFILE_HIGH444_PREDICTIVE;
4545+ else if( BIT_DEPTH > 8 )
4546+ sps->i_profile_idc = PROFILE_HIGH10;
4547 else if( param->analyse.b_transform_8x8 || param->i_cqm_preset != X264_CQM_FLAT )
4548 sps->i_profile_idc = PROFILE_HIGH;
4549 else if( param->b_cabac || param->i_bframe > 0 || param->b_interlaced || param->b_fake_interlaced || param->analyse.i_weighted_pred > 0 )
4550@@ -260,8 +262,8 @@ void x264_sps_write( bs_t *s, x264_sps_t *sps )
4551 if( sps->i_profile_idc >= PROFILE_HIGH )
4552 {
4553 bs_write_ue( s, 1 ); // chroma_format_idc = 4:2:0
4554- bs_write_ue( s, 0 ); // bit_depth_luma_minus8
4555- bs_write_ue( s, 0 ); // bit_depth_chroma_minus8
4556+ bs_write_ue( s, BIT_DEPTH-8 ); // bit_depth_luma_minus8
4557+ bs_write_ue( s, BIT_DEPTH-8 ); // bit_depth_chroma_minus8
4558 bs_write( s, 1, sps->b_qpprime_y_zero_transform_bypass );
4559 bs_write( s, 1, 0 ); // seq_scaling_matrix_present_flag
4560 }
4561@@ -488,7 +490,7 @@ void x264_pps_write( bs_t *s, x264_pps_t *pps )
4562 bs_write( s, 1, pps->b_weighted_pred );
4563 bs_write( s, 2, pps->b_weighted_bipred );
4564
4565- bs_write_se( s, pps->i_pic_init_qp - 26 );
4566+ bs_write_se( s, pps->i_pic_init_qp - 26 - QP_BD_OFFSET );
4567 bs_write_se( s, pps->i_pic_init_qs - 26 );
4568 bs_write_se( s, pps->i_chroma_qp_index_offset );
4569
4570diff --git a/encoder/slicetype.c b/encoder/slicetype.c
4571index 7d69b71..ad2a8c2 100644
4572--- a/encoder/slicetype.c
4573+++ b/encoder/slicetype.c
4574@@ -303,7 +303,7 @@ static void x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
4575 (mv1)[0], (mv1)[1], 8, 8, w ); \
4576 h->mc.avg[PIXEL_8x8]( pix1, 16, src1, stride1, src2, stride2, i_bipred_weight ); \
4577 } \
4578- i_cost = penalty + h->pixf.mbcmp[PIXEL_8x8]( \
4579+ i_cost = penalty * a->i_lambda + h->pixf.mbcmp[PIXEL_8x8]( \
4580 m[0].p_fenc[0], FENC_STRIDE, pix1, 16 ); \
4581 COPY2_IF_LT( i_bcost, i_cost, list_used, 3 ); \
4582 }
4583@@ -393,9 +393,9 @@ static void x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
4584 }
4585
4586 x264_me_search( h, &m[l], mvc, i_mvc );
4587- m[l].cost -= 2; // remove mvcost from skip mbs
4588+ m[l].cost -= 2 * a->i_lambda; // remove mvcost from skip mbs
4589 if( M32( m[l].mv ) )
4590- m[l].cost += 5;
4591+ m[l].cost += 5 * a->i_lambda;
4592
4593 skip_motionest:
4594 CP32( fenc_mvs[l], m[l].mv );
4595@@ -418,7 +418,7 @@ lowres_intra_mb:
4596 ALIGNED_ARRAY_16( pixel, edge,[33] );
4597 pixel *pix = &pix1[8+FDEC_STRIDE - 1];
4598 pixel *src = &fenc->lowres[0][i_pel_offset - 1];
4599- const int intra_penalty = 5;
4600+ const int intra_penalty = 5 * a->i_lambda;
4601 int satds[3];
4602
4603 memcpy( pix-FDEC_STRIDE, src-i_stride, 17 * sizeof(pixel) );
4604@@ -496,7 +496,7 @@ lowres_intra_mb:
4605 }
4606 }
4607
4608- fenc->lowres_costs[b-p0][p1-b][i_mb_xy] = i_bcost + (list_used << LOWRES_COST_SHIFT);
4609+ fenc->lowres_costs[b-p0][p1-b][i_mb_xy] = X264_MIN( i_bcost, LOWRES_COST_MASK ) + (list_used << LOWRES_COST_SHIFT);
4610 }
4611 #undef TRY_BIDIR
4612
4613diff --git a/tools/checkasm.c b/tools/checkasm.c
4614index 7fa2c0c..a5ffa17 100644
4615--- a/tools/checkasm.c
4616+++ b/tools/checkasm.c
4617@@ -40,8 +40,10 @@
4618 uint8_t *buf1, *buf2;
4619 /* buf3, buf4: used to store output */
4620 uint8_t *buf3, *buf4;
4621-/* pbuf*: point to the same memory as above, just for type convenience */
4622-pixel *pbuf1, *pbuf2, *pbuf3, *pbuf4;
4623+/* pbuf1, pbuf2: initialised to random pixel data and shouldn't write into them. */
4624+pixel *pbuf1, *pbuf2;
4625+/* pbuf3, pbuf4: point to buf3, buf4, just for type convenience */
4626+pixel *pbuf3, *pbuf4;
4627
4628 int quiet = 0;
4629
4630@@ -256,11 +258,15 @@ static int check_pixel( int cpu_ref, int cpu_new )
4631 int z = i|(i>>4);
4632 z ^= z>>2;
4633 z ^= z>>1;
4634- buf3[i] = ~(buf4[i] = -(z&1));
4635+ pbuf4[i] = -(z&1) & PIXEL_MAX;
4636+ pbuf3[i] = ~pbuf4[i] & PIXEL_MAX;
4637 }
4638 // random pattern made of maxed pixel differences, in case an intermediate value overflows
4639 for( int i = 256; i < 0x1000; i++ )
4640- buf3[i] = ~(buf4[i] = -(buf1[i&~0x88]&1));
4641+ {
4642+ pbuf4[i] = -(pbuf1[i&~0x88]&1) & PIXEL_MAX;
4643+ pbuf3[i] = ~(pbuf4[i]) & PIXEL_MAX;
4644+ }
4645
4646 #define TEST_PIXEL( name, align ) \
4647 ok = 1, used_asm = 0; \
4648@@ -535,22 +541,22 @@ static int check_dct( int cpu_ref, int cpu_new )
4649 used_asm = 1; \
4650 call_c( dct_c.name, t1, pbuf1, pbuf2 ); \
4651 call_a( dct_asm.name, t2, pbuf1, pbuf2 ); \
4652- if( memcmp( t1, t2, size ) ) \
4653+ if( memcmp( t1, t2, size*sizeof(dctcoef) ) ) \
4654 { \
4655 ok = 0; \
4656 fprintf( stderr, #name " [FAILED]\n" ); \
4657 } \
4658 }
4659 ok = 1; used_asm = 0;
4660- TEST_DCT( sub4x4_dct, dct1[0], dct2[0], 16*2 );
4661- TEST_DCT( sub8x8_dct, dct1, dct2, 16*2*4 );
4662- TEST_DCT( sub8x8_dct_dc, dctdc[0], dctdc[1], 4*2 );
4663- TEST_DCT( sub16x16_dct, dct1, dct2, 16*2*16 );
4664+ TEST_DCT( sub4x4_dct, dct1[0], dct2[0], 16 );
4665+ TEST_DCT( sub8x8_dct, dct1, dct2, 16*4 );
4666+ TEST_DCT( sub8x8_dct_dc, dctdc[0], dctdc[1], 4 );
4667+ TEST_DCT( sub16x16_dct, dct1, dct2, 16*16 );
4668 report( "sub_dct4 :" );
4669
4670 ok = 1; used_asm = 0;
4671- TEST_DCT( sub8x8_dct8, (void*)dct1[0], (void*)dct2[0], 64*2 );
4672- TEST_DCT( sub16x16_dct8, (void*)dct1, (void*)dct2, 64*2*4 );
4673+ TEST_DCT( sub8x8_dct8, (void*)dct1[0], (void*)dct2[0], 64 );
4674+ TEST_DCT( sub16x16_dct8, (void*)dct1, (void*)dct2, 64*4 );
4675 report( "sub_dct8 :" );
4676 #undef TEST_DCT
4677
4678@@ -574,13 +580,13 @@ static int check_dct( int cpu_ref, int cpu_new )
4679 { \
4680 set_func_name( #name ); \
4681 used_asm = 1; \
4682- memcpy( buf3, buf1, 32*32 * sizeof(pixel) ); \
4683- memcpy( buf4, buf1, 32*32 * sizeof(pixel) ); \
4684- memcpy( dct1, src, 512 * sizeof(pixel) ); \
4685- memcpy( dct2, src, 512 * sizeof(pixel) ); \
4686+ memcpy( pbuf3, pbuf1, 32*32 * sizeof(pixel) ); \
4687+ memcpy( pbuf4, pbuf1, 32*32 * sizeof(pixel) ); \
4688+ memcpy( dct1, src, 256 * sizeof(dctcoef) ); \
4689+ memcpy( dct2, src, 256 * sizeof(dctcoef) ); \
4690 call_c1( dct_c.name, pbuf3, (void*)dct1 ); \
4691 call_a1( dct_asm.name, pbuf4, (void*)dct2 ); \
4692- if( memcmp( buf3, buf4, 32*32 * sizeof(pixel) ) ) \
4693+ if( memcmp( pbuf3, pbuf4, 32*32 * sizeof(pixel) ) ) \
4694 { \
4695 ok = 0; \
4696 fprintf( stderr, #name " [FAILED]\n" ); \
4697@@ -615,10 +621,10 @@ static int check_dct( int cpu_ref, int cpu_new )
4698 dct1[0][j] = !i ? (j^j>>1^j>>2^j>>3)&1 ? 4080 : -4080 /* max dc */\
4699 : i<8 ? (*p++)&1 ? 4080 : -4080 /* max elements */\
4700 : ((*p++)&0x1fff)-0x1000; /* general case */\
4701- memcpy( dct2, dct1, 32 );\
4702+ memcpy( dct2, dct1, 16 * sizeof(dctcoef) );\
4703 call_c1( dct_c.name, dct1[0] );\
4704 call_a1( dct_asm.name, dct2[0] );\
4705- if( memcmp( dct1, dct2, 32 ) )\
4706+ if( memcmp( dct1, dct2, 16 * sizeof(dctcoef) ) )\
4707 ok = 0;\
4708 }\
4709 call_c2( dct_c.name, dct1[0] );\
4710@@ -658,11 +664,11 @@ static int check_dct( int cpu_ref, int cpu_new )
4711 int nz_a, nz_c; \
4712 set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" ); \
4713 used_asm = 1; \
4714- memcpy( buf3, buf1, 16*FDEC_STRIDE * sizeof(pixel) ); \
4715- memcpy( buf4, buf1, 16*FDEC_STRIDE * sizeof(pixel) ); \
4716+ memcpy( pbuf3, pbuf1, 16*FDEC_STRIDE * sizeof(pixel) ); \
4717+ memcpy( pbuf4, pbuf1, 16*FDEC_STRIDE * sizeof(pixel) ); \
4718 nz_c = call_c1( zigzag_c.name, t1, pbuf2, pbuf3 ); \
4719 nz_a = call_a1( zigzag_asm.name, t2, pbuf2, pbuf4 ); \
4720- if( memcmp( t1, t2, size*sizeof(dctcoef) )|| memcmp( buf3, buf4, 16*FDEC_STRIDE ) || nz_c != nz_a ) \
4721+ if( memcmp( t1, t2, size*sizeof(dctcoef) ) || memcmp( pbuf3, pbuf4, 16*FDEC_STRIDE*sizeof(pixel) ) || nz_c != nz_a ) \
4722 { \
4723 ok = 0; \
4724 fprintf( stderr, #name " [FAILED]\n" ); \
4725@@ -680,8 +686,8 @@ static int check_dct( int cpu_ref, int cpu_new )
4726 used_asm = 1; \
4727 for( int i = 0; i < 2; i++ ) \
4728 { \
4729- memcpy( buf3, buf2, 16*FDEC_STRIDE * sizeof(pixel) ); \
4730- memcpy( buf4, buf2, 16*FDEC_STRIDE * sizeof(pixel) ); \
4731+ memcpy( pbuf3, pbuf2, 16*FDEC_STRIDE * sizeof(pixel) ); \
4732+ memcpy( pbuf4, pbuf2, 16*FDEC_STRIDE * sizeof(pixel) ); \
4733 for( int j = 0; j < 4; j++ ) \
4734 { \
4735 memcpy( pbuf3 + j*FDEC_STRIDE, (i?pbuf1:pbuf2) + j*FENC_STRIDE, 4 * sizeof(pixel) ); \
4736@@ -689,7 +695,7 @@ static int check_dct( int cpu_ref, int cpu_new )
4737 } \
4738 nz_c = call_c1( zigzag_c.name, t1, pbuf2, pbuf3, &dc_c ); \
4739 nz_a = call_a1( zigzag_asm.name, t2, pbuf2, pbuf4, &dc_a ); \
4740- if( memcmp( t1+1, t2+1, 15*sizeof(dctcoef) ) || memcmp( buf3, buf4, 16*FDEC_STRIDE * sizeof(pixel) ) || nz_c != nz_a || dc_c != dc_a ) \
4741+ if( memcmp( t1+1, t2+1, 15*sizeof(dctcoef) ) || memcmp( pbuf3, pbuf4, 16*FDEC_STRIDE * sizeof(pixel) ) || nz_c != nz_a || dc_c != dc_a ) \
4742 { \
4743 ok = 0; \
4744 fprintf( stderr, #name " [FAILED]\n" ); \
4745@@ -779,11 +785,11 @@ static int check_mc( int cpu_ref, int cpu_new )
4746 const x264_weight_t *weight = weight_none; \
4747 set_func_name( "mc_luma_%dx%d", w, h ); \
4748 used_asm = 1; \
4749- memset( buf3, 0xCD, 1024 ); \
4750- memset( buf4, 0xCD, 1024 ); \
4751+ for( int i = 0; i < 1024; i++ ) \
4752+ pbuf3[i] = pbuf4[i] = 0xCD; \
4753 call_c( mc_c.mc_luma, dst1, 32, src2, 64, dx, dy, w, h, weight ); \
4754 call_a( mc_a.mc_luma, dst2, 32, src2, 64, dx, dy, w, h, weight ); \
4755- if( memcmp( buf3, buf4, 1024 ) ) \
4756+ if( memcmp( pbuf3, pbuf4, 1024 * sizeof(pixel) ) ) \
4757 { \
4758 fprintf( stderr, "mc_luma[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \
4759 ok = 0; \
4760@@ -796,8 +802,8 @@ static int check_mc( int cpu_ref, int cpu_new )
4761 const x264_weight_t *weight = weight_none; \
4762 set_func_name( "get_ref_%dx%d", w, h ); \
4763 used_asm = 1; \
4764- memset( buf3, 0xCD, 1024 ); \
4765- memset( buf4, 0xCD, 1024 ); \
4766+ for( int i = 0; i < 1024; i++ ) \
4767+ pbuf3[i] = pbuf4[i] = 0xCD; \
4768 call_c( mc_c.mc_luma, dst1, 32, src2, 64, dx, dy, w, h, weight ); \
4769 ref = (pixel*)call_a( mc_a.get_ref, ref, &ref_stride, src2, 64, dx, dy, w, h, weight ); \
4770 for( int i = 0; i < h; i++ ) \
4771@@ -814,15 +820,15 @@ static int check_mc( int cpu_ref, int cpu_new )
4772 { \
4773 set_func_name( "mc_chroma_%dx%d", w, h ); \
4774 used_asm = 1; \
4775- memset( buf3, 0xCD, 1024 ); \
4776- memset( buf4, 0xCD, 1024 ); \
4777+ for( int i = 0; i < 1024; i++ ) \
4778+ pbuf3[i] = pbuf4[i] = 0xCD; \
4779 call_c( mc_c.mc_chroma, dst1, 16, src, 64, dx, dy, w, h ); \
4780 call_a( mc_a.mc_chroma, dst2, 16, src, 64, dx, dy, w, h ); \
4781 /* mc_chroma width=2 may write garbage to the right of dst. ignore that. */ \
4782 for( int j = 0; j < h; j++ ) \
4783 for( int i = w; i < 4; i++ ) \
4784 dst2[i+j*16] = dst1[i+j*16]; \
4785- if( memcmp( buf3, buf4, 1024 ) ) \
4786+ if( memcmp( pbuf3, pbuf4, 1024 * sizeof(pixel) ) ) \
4787 { \
4788 fprintf( stderr, "mc_chroma[mv(%d,%d) %2dx%-2d] [FAILED]\n", dx, dy, w, h ); \
4789 ok = 0; \
4790@@ -867,15 +873,15 @@ static int check_mc( int cpu_ref, int cpu_new )
4791 ok = 1, used_asm = 0; \
4792 for( int i = 0; i < 10; i++ ) \
4793 { \
4794- memcpy( buf3, pbuf1+320, 320 * sizeof(pixel) ); \
4795- memcpy( buf4, pbuf1+320, 320 * sizeof(pixel) ); \
4796+ memcpy( pbuf3, pbuf1+320, 320 * sizeof(pixel) ); \
4797+ memcpy( pbuf4, pbuf1+320, 320 * sizeof(pixel) ); \
4798 if( mc_a.name[i] != mc_ref.name[i] ) \
4799 { \
4800 set_func_name( "%s_%s", #name, pixel_names[i] ); \
4801 used_asm = 1; \
4802 call_c1( mc_c.name[i], pbuf3, 16, pbuf2+1, 16, pbuf1+18, 16, weight ); \
4803 call_a1( mc_a.name[i], pbuf4, 16, pbuf2+1, 16, pbuf1+18, 16, weight ); \
4804- if( memcmp( buf3, buf4, 320 * sizeof(pixel) ) ) \
4805+ if( memcmp( pbuf3, pbuf4, 320 * sizeof(pixel) ) ) \
4806 { \
4807 ok = 0; \
4808 fprintf( stderr, #name "[%d]: [FAILED]\n", i ); \
4809@@ -971,8 +977,8 @@ static int check_mc( int cpu_ref, int cpu_new )
4810 void *tmp = pbuf3+49*64;
4811 set_func_name( "hpel_filter" );
4812 ok = 1; used_asm = 1;
4813- memset( buf3, 0, 4096 * sizeof(pixel) );
4814- memset( buf4, 0, 4096 * sizeof(pixel) );
4815+ memset( pbuf3, 0, 4096 * sizeof(pixel) );
4816+ memset( pbuf4, 0, 4096 * sizeof(pixel) );
4817 call_c( mc_c.hpel_filter, dstc[0], dstc[1], dstc[2], srchpel, 64, 48, 10, tmp );
4818 call_a( mc_a.hpel_filter, dsta[0], dsta[1], dsta[2], srchpel, 64, 48, 10, tmp );
4819 for( int i = 0; i < 3; i++ )
4820@@ -1030,13 +1036,13 @@ static int check_mc( int cpu_ref, int cpu_new )
4821 int stride = 80;\
4822 set_func_name( #name );\
4823 used_asm = 1;\
4824- memcpy( buf3, buf1, size*2*stride * sizeof(pixel) );\
4825- memcpy( buf4, buf1, size*2*stride * sizeof(pixel) );\
4826- uint16_t *sum = (uint16_t*)buf3;\
4827+ memcpy( pbuf3, pbuf1, size*2*stride * sizeof(pixel) );\
4828+ memcpy( pbuf4, pbuf1, size*2*stride * sizeof(pixel) );\
4829+ uint16_t *sum = (uint16_t*)pbuf3;\
4830 call_c1( mc_c.name, __VA_ARGS__ );\
4831- sum = (uint16_t*)buf4;\
4832+ sum = (uint16_t*)pbuf4;\
4833 call_a1( mc_a.name, __VA_ARGS__ );\
4834- if( memcmp( buf3, buf4, (stride-8)*2 * sizeof(pixel) )\
4835+ if( memcmp( pbuf3, pbuf4, (stride-8)*2 * sizeof(pixel) )\
4836 || (size>9 && memcmp( pbuf3+18*stride, pbuf4+18*stride, (stride-8)*2 * sizeof(pixel) )))\
4837 ok = 0;\
4838 call_c2( mc_c.name, __VA_ARGS__ );\
4839@@ -1096,11 +1102,11 @@ static int check_deblock( int cpu_ref, int cpu_new )
4840 /* not exactly the real values of a,b,tc but close enough */
4841 for( int i = 35, a = 255, c = 250; i >= 0; i-- )
4842 {
4843- alphas[i] = a;
4844- betas[i] = (i+1)/2;
4845- tcs[i][0] = tcs[i][3] = (c+6)/10;
4846- tcs[i][1] = (c+7)/15;
4847- tcs[i][2] = (c+9)/20;
4848+ alphas[i] = a << (BIT_DEPTH-8);
4849+ betas[i] = (i+1)/2 << (BIT_DEPTH-8);
4850+ tcs[i][0] = tcs[i][3] = (c+6)/10 << (BIT_DEPTH-8);
4851+ tcs[i][1] = (c+7)/15 << (BIT_DEPTH-8);
4852+ tcs[i][2] = (c+9)/20 << (BIT_DEPTH-8);
4853 a = a*9/10;
4854 c = c*9/10;
4855 }
4856@@ -1111,15 +1117,15 @@ static int check_deblock( int cpu_ref, int cpu_new )
4857 int off = 8*32 + (i&15)*4*!align; /* benchmark various alignments of h filter */ \
4858 for( int j = 0; j < 1024; j++ ) \
4859 /* two distributions of random to excersize different failure modes */ \
4860- buf3[j] = rand() & (i&1 ? 0xf : 0xff ); \
4861- memcpy( buf4, buf3, 1024 * sizeof(pixel) ); \
4862+ pbuf3[j] = rand() & (i&1 ? 0xf : PIXEL_MAX ); \
4863+ memcpy( pbuf4, pbuf3, 1024 * sizeof(pixel) ); \
4864 if( db_a.name != db_ref.name ) \
4865 { \
4866 set_func_name( #name ); \
4867 used_asm = 1; \
4868 call_c1( db_c.name, pbuf3+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
4869 call_a1( db_a.name, pbuf4+off, 32, alphas[i], betas[i], ##__VA_ARGS__ ); \
4870- if( memcmp( buf3, buf4, 1024 * sizeof(pixel) ) ) \
4871+ if( memcmp( pbuf3, pbuf4, 1024 * sizeof(pixel) ) ) \
4872 { \
4873 ok = 0; \
4874 fprintf( stderr, #name "(a=%d, b=%d): [FAILED]\n", alphas[i], betas[i] ); \
4875@@ -1200,7 +1206,7 @@ static int check_quant( int cpu_ref, int cpu_new )
4876 h->pps = h->pps_array;
4877 x264_param_default( &h->param );
4878 h->chroma_qp_table = i_chroma_qp_table + 12;
4879- h->param.rc.i_qp_min = 26;
4880+ h->param.rc.i_qp_min = 26 + QP_BD_OFFSET;
4881 h->param.analyse.b_transform_8x8 = 1;
4882
4883 for( int i_cqm = 0; i_cqm < 4; i_cqm++ )
4884@@ -1219,9 +1225,10 @@ static int check_quant( int cpu_ref, int cpu_new )
4885 }
4886 else
4887 {
4888+ int max_scale = BIT_DEPTH < 10 ? 255 : 228;
4889 if( i_cqm == 2 )
4890 for( int i = 0; i < 64; i++ )
4891- cqm_buf[i] = 10 + rand() % 246;
4892+ cqm_buf[i] = 10 + rand() % (max_scale - 9);
4893 else
4894 for( int i = 0; i < 64; i++ )
4895 cqm_buf[i] = 1;
4896@@ -1260,7 +1267,7 @@ static int check_quant( int cpu_ref, int cpu_new )
4897 { \
4898 set_func_name( #name ); \
4899 used_asms[0] = 1; \
4900- for( int qp = 51; qp > 0; qp-- ) \
4901+ for( int qp = QP_MAX; qp > 0; qp-- ) \
4902 { \
4903 for( int j = 0; j < 2; j++ ) \
4904 { \
4905@@ -1269,7 +1276,7 @@ static int check_quant( int cpu_ref, int cpu_new )
4906 dct1[i] = dct2[i] = j ? (rand() & 0x1fff) - 0xfff : 0; \
4907 result_c = call_c1( qf_c.name, dct1, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
4908 result_a = call_a1( qf_a.name, dct2, h->quant4_mf[CQM_4IY][qp][0], h->quant4_bias[CQM_4IY][qp][0] ); \
4909- if( memcmp( dct1, dct2, 16*2 ) || result_c != result_a ) \
4910+ if( memcmp( dct1, dct2, 16*sizeof(dctcoef) ) || result_c != result_a ) \
4911 { \
4912 oks[0] = 0; \
4913 fprintf( stderr, #name "(cqm=%d): [FAILED]\n", i_cqm ); \
4914@@ -1286,14 +1293,14 @@ static int check_quant( int cpu_ref, int cpu_new )
4915 { \
4916 set_func_name( #qname ); \
4917 used_asms[0] = 1; \
4918- for( int qp = 51; qp > 0; qp-- ) \
4919+ for( int qp = QP_MAX; qp > 0; qp-- ) \
4920 { \
4921 for( int j = 0; j < 2; j++ ) \
4922 { \
4923 INIT_QUANT##w(j) \
4924 int result_c = call_c1( qf_c.qname, dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
4925 int result_a = call_a1( qf_a.qname, dct2, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
4926- if( memcmp( dct1, dct2, w*w*2 ) || result_c != result_a ) \
4927+ if( memcmp( dct1, dct2, w*w*sizeof(dctcoef) ) || result_c != result_a ) \
4928 { \
4929 oks[0] = 0; \
4930 fprintf( stderr, #qname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
4931@@ -1317,14 +1324,14 @@ static int check_quant( int cpu_ref, int cpu_new )
4932 { \
4933 set_func_name( "%s_%s", #dqname, i_cqm?"cqm":"flat" ); \
4934 used_asms[1] = 1; \
4935- for( int qp = 51; qp > 0; qp-- ) \
4936+ for( int qp = QP_MAX; qp > 0; qp-- ) \
4937 { \
4938 INIT_QUANT##w(1) \
4939 call_c1( qf_c.qname, dct1, h->quant##w##_mf[block][qp], h->quant##w##_bias[block][qp] ); \
4940- memcpy( dct2, dct1, w*w*2 ); \
4941+ memcpy( dct2, dct1, w*w*sizeof(dctcoef) ); \
4942 call_c1( qf_c.dqname, dct1, h->dequant##w##_mf[block], qp ); \
4943 call_a1( qf_a.dqname, dct2, h->dequant##w##_mf[block], qp ); \
4944- if( memcmp( dct1, dct2, w*w*2 ) ) \
4945+ if( memcmp( dct1, dct2, w*w*sizeof(dctcoef) ) ) \
4946 { \
4947 oks[1] = 0; \
4948 fprintf( stderr, #dqname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
4949@@ -1345,15 +1352,15 @@ static int check_quant( int cpu_ref, int cpu_new )
4950 { \
4951 set_func_name( "%s_%s", #dqname, i_cqm?"cqm":"flat" ); \
4952 used_asms[1] = 1; \
4953- for( int qp = 51; qp > 0; qp-- ) \
4954+ for( int qp = QP_MAX; qp > 0; qp-- ) \
4955 { \
4956 for( int i = 0; i < 16; i++ ) \
4957 dct1[i] = rand(); \
4958 call_c1( qf_c.qname, dct1, h->quant##w##_mf[block][qp][0]>>1, h->quant##w##_bias[block][qp][0]>>1 ); \
4959- memcpy( dct2, dct1, w*w*2 ); \
4960+ memcpy( dct2, dct1, w*w*sizeof(dctcoef) ); \
4961 call_c1( qf_c.dqname, dct1, h->dequant##w##_mf[block], qp ); \
4962 call_a1( qf_a.dqname, dct2, h->dequant##w##_mf[block], qp ); \
4963- if( memcmp( dct1, dct2, w*w*2 ) ) \
4964+ if( memcmp( dct1, dct2, w*w*sizeof(dctcoef) ) ) \
4965 { \
4966 oks[1] = 0; \
4967 fprintf( stderr, #dqname "(qp=%d, cqm=%d, block="#block"): [FAILED]\n", qp, i_cqm ); \
4968@@ -1381,12 +1388,12 @@ static int check_quant( int cpu_ref, int cpu_new )
4969 for( int size = 16; size <= 64; size += 48 )
4970 {
4971 set_func_name( "denoise_dct" );
4972- memcpy( dct1, buf1, size*2 );
4973- memcpy( dct2, buf1, size*2 );
4974+ memcpy( dct1, buf1, size*sizeof(dctcoef) );
4975+ memcpy( dct2, buf1, size*sizeof(dctcoef) );
4976 memcpy( buf3+256, buf3, 256 );
4977 call_c1( qf_c.denoise_dct, dct1, (uint32_t*)buf3, (uint16_t*)buf2, size );
4978 call_a1( qf_a.denoise_dct, dct2, (uint32_t*)(buf3+256), (uint16_t*)buf2, size );
4979- if( memcmp( dct1, dct2, size*2 ) || memcmp( buf3+4, buf3+256+4, (size-1)*sizeof(uint32_t) ) )
4980+ if( memcmp( dct1, dct2, size*sizeof(dctcoef) ) || memcmp( buf3+4, buf3+256+4, (size-1)*sizeof(uint32_t) ) )
4981 ok = 0;
4982 call_c2( qf_c.denoise_dct, dct1, (uint32_t*)buf3, (uint16_t*)buf2, size );
4983 call_a2( qf_a.denoise_dct, dct2, (uint32_t*)(buf3+256), (uint16_t*)buf2, size );
4984@@ -1431,7 +1438,7 @@ static int check_quant( int cpu_ref, int cpu_new )
4985 { \
4986 int nnz = 0; \
4987 int max = rand() & (w*w-1); \
4988- memset( dct1, 0, w*w*2 ); \
4989+ memset( dct1, 0, w*w*sizeof(dctcoef) ); \
4990 for( int idx = ac; idx < max; idx++ ) \
4991 nnz |= dct1[idx] = !(rand()&3) + (!(rand()&15))*rand(); \
4992 if( !nnz ) \
4993@@ -1464,7 +1471,7 @@ static int check_quant( int cpu_ref, int cpu_new )
4994 x264_run_level_t runlevel_c, runlevel_a; \
4995 int nnz = 0; \
4996 int max = rand() & (w*w-1); \
4997- memset( dct1, 0, w*w*2 ); \
4998+ memset( dct1, 0, w*w*sizeof(dctcoef) ); \
4999 memcpy( &runlevel_a, buf1+i, sizeof(x264_run_level_t) ); \
5000 memcpy( &runlevel_c, buf1+i, sizeof(x264_run_level_t) ); \
5001 for( int idx = ac; idx < max; idx++ ) \
5002@@ -1474,7 +1481,7 @@ static int check_quant( int cpu_ref, int cpu_new )
5003 int result_c = call_c( qf_c.lastname, dct1+ac, &runlevel_c ); \
5004 int result_a = call_a( qf_a.lastname, dct1+ac, &runlevel_a ); \
5005 if( result_c != result_a || runlevel_c.last != runlevel_a.last || \
5006- memcmp(runlevel_c.level, runlevel_a.level, sizeof(int16_t)*result_c) || \
5007+ memcmp(runlevel_c.level, runlevel_a.level, sizeof(dctcoef)*result_c) || \
5008 memcmp(runlevel_c.run, runlevel_a.run, sizeof(uint8_t)*(result_c-1)) ) \
5009 { \
5010 ok = 0; \
5011@@ -1529,11 +1536,11 @@ static int check_intra( int cpu_ref, int cpu_new )
5012 {\
5013 set_func_name( "intra_%s_%s", #name, intra_##name##_names[dir] );\
5014 used_asm = 1;\
5015- memcpy( buf3, buf1, 32*20 * sizeof(pixel) );\
5016- memcpy( buf4, buf1, 32*20 * sizeof(pixel) );\
5017+ memcpy( pbuf3, pbuf1, 32*20 * sizeof(pixel) );\
5018+ memcpy( pbuf4, pbuf1, 32*20 * sizeof(pixel) );\
5019 call_c( ip_c.name[dir], pbuf3+48, ##__VA_ARGS__ );\
5020 call_a( ip_a.name[dir], pbuf4+48, ##__VA_ARGS__ );\
5021- if( memcmp( buf3, buf4, 32*20 * sizeof(pixel) ) )\
5022+ if( memcmp( pbuf3, pbuf4, 32*20 * sizeof(pixel) ) )\
5023 {\
5024 fprintf( stderr, #name "[%d] : [FAILED]\n", dir );\
5025 ok = 0;\
5026@@ -1544,7 +1551,7 @@ static int check_intra( int cpu_ref, int cpu_new )
5027 {\
5028 printf( "%2x ", edge[14-j] );\
5029 for( int k = 0; k < w; k++ )\
5030- printf( "%2x ", buf4[48+k+j*32] );\
5031+ printf( "%2x ", pbuf4[48+k+j*32] );\
5032 printf( "\n" );\
5033 }\
5034 printf( "\n" );\
5035@@ -1552,7 +1559,7 @@ static int check_intra( int cpu_ref, int cpu_new )
5036 {\
5037 printf( " " );\
5038 for( int k = 0; k < w; k++ )\
5039- printf( "%2x ", buf3[48+k+j*32] );\
5040+ printf( "%2x ", pbuf3[48+k+j*32] );\
5041 printf( "\n" );\
5042 }\
5043 }\
5044@@ -1831,8 +1838,9 @@ int main(int argc, char *argv[])
5045 fprintf( stderr, "x264: using random seed %u\n", seed );
5046 srand( seed );
5047
5048- buf1 = x264_malloc( 0x3e00 + 16*BENCH_ALIGNS );
5049- if( !buf1 )
5050+ buf1 = x264_malloc( 0x1e00 + 0x2000*sizeof(pixel) + 16*BENCH_ALIGNS );
5051+ pbuf1 = x264_malloc( 0x1e00*sizeof(pixel) + 16*BENCH_ALIGNS );
5052+ if( !buf1 || !pbuf1 )
5053 {
5054 fprintf( stderr, "malloc failed, unable to initiate tests!\n" );
5055 return -1;
5056@@ -1840,15 +1848,17 @@ int main(int argc, char *argv[])
5057 #define INIT_POINTER_OFFSETS\
5058 buf2 = buf1 + 0xf00;\
5059 buf3 = buf2 + 0xf00;\
5060- buf4 = buf3 + 0x1000;\
5061- pbuf1 = (pixel*)buf1;\
5062- pbuf2 = (pixel*)buf2;\
5063+ buf4 = buf3 + 0x1000*sizeof(pixel);\
5064+ pbuf2 = pbuf1 + 0xf00;\
5065 pbuf3 = (pixel*)buf3;\
5066 pbuf4 = (pixel*)buf4;
5067 INIT_POINTER_OFFSETS;
5068 for( int i = 0; i < 0x1e00; i++ )
5069+ {
5070 buf1[i] = rand() & 0xFF;
5071- memset( buf1+0x1e00, 0, 0x2000 );
5072+ pbuf1[i] = rand() & PIXEL_MAX;
5073+ }
5074+ memset( buf1+0x1e00, 0, 0x2000*sizeof(pixel) );
5075
5076 /* 16-byte alignment is guaranteed whenever it's useful, but some functions also vary in speed depending on %64 */
5077 if( do_bench )
5078@@ -1857,6 +1867,7 @@ int main(int argc, char *argv[])
5079 INIT_POINTER_OFFSETS;
5080 ret |= x264_stack_pagealign( check_all_flags, i*16 );
5081 buf1 += 16;
5082+ pbuf1 += 16;
5083 quiet = 1;
5084 fprintf( stderr, "%d/%d\r", i+1, BENCH_ALIGNS );
5085 }
5086diff --git a/x264.c b/x264.c
5087index 0bede93..c1141ab 100644
5088--- a/x264.c
5089+++ b/x264.c
5090@@ -262,6 +262,7 @@ static void Help( x264_param_t *defaults, int longhelp )
5091 " .mkv -> Matroska\n"
5092 " .flv -> Flash Video\n"
5093 " .mp4 -> MP4 if compiled with GPAC support (%s)\n"
5094+ "Output bit depth: %d (configured at compile time)\n"
5095 "\n"
5096 "Options:\n"
5097 "\n"
5098@@ -286,10 +287,11 @@ static void Help( x264_param_t *defaults, int longhelp )
5099 "no",
5100 #endif
5101 #if HAVE_GPAC
5102- "yes"
5103+ "yes",
5104 #else
5105- "no"
5106+ "no",
5107 #endif
5108+ BIT_DEPTH
5109 );
5110 H0( "Example usage:\n" );
5111 H0( "\n" );
5112@@ -311,8 +313,8 @@ static void Help( x264_param_t *defaults, int longhelp )
5113 H0( "\n" );
5114 H0( "Presets:\n" );
5115 H0( "\n" );
5116- H0( " --profile Force the limits of an H.264 profile [high]\n"
5117- " Overrides all settings.\n" );
5118+ H0( " --profile Force the limits of an H.264 profile [%s]\n"
5119+ " Overrides all settings.\n", BIT_DEPTH > 8 ? "high10" : "high" );
5120 H2( " - baseline:\n"
5121 " --no-8x8dct --bframes 0 --no-cabac\n"
5122 " --cqm flat --weightp 0\n"
5123@@ -322,8 +324,11 @@ static void Help( x264_param_t *defaults, int longhelp )
5124 " --no-8x8dct --cqm flat\n"
5125 " No lossless.\n"
5126 " - high:\n"
5127- " No lossless.\n" );
5128- else H0( " - baseline,main,high\n" );
5129+ " No lossless.\n"
5130+ " - high10:\n"
5131+ " No lossless.\n"
5132+ " Support for bit depth 8-10.\n" );
5133+ else H0( " - baseline,main,high,high10\n" );
5134 H0( " --preset Use a preset to select encoding settings [medium]\n"
5135 " Overridden by user settings.\n" );
5136 H2( " - ultrafast:\n"
5137@@ -453,9 +458,9 @@ static void Help( x264_param_t *defaults, int longhelp )
5138 H0( "\n" );
5139 H0( "Ratecontrol:\n" );
5140 H0( "\n" );
5141- H1( " -q, --qp <integer> Force constant QP (0-51, 0=lossless)\n" );
5142+ H1( " -q, --qp <integer> Force constant QP (0-%d, 0=lossless)\n", QP_MAX );
5143 H0( " -B, --bitrate <integer> Set bitrate (kbit/s)\n" );
5144- H0( " --crf <float> Quality-based VBR (0-51, 0=lossless) [%.1f]\n", defaults->rc.f_rf_constant );
5145+ H0( " --crf <float> Quality-based VBR (0-%d, 0=lossless) [%.1f]\n", QP_MAX, defaults->rc.f_rf_constant );
5146 H1( " --rc-lookahead <integer> Number of frames for frametype lookahead [%d]\n", defaults->rc.i_lookahead );
5147 H0( " --vbv-maxrate <integer> Max local bitrate (kbit/s) [%d]\n", defaults->rc.i_vbv_max_bitrate );
5148 H0( " --vbv-bufsize <integer> Set size of the VBV buffer (kbit) [%d]\n", defaults->rc.i_vbv_buffer_size );
5149@@ -1040,6 +1045,7 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
5150 #else
5151 printf( "using a non-gcc compiler\n" );
5152 #endif
5153+ printf( "configuration: --bit-depth=%d\n", BIT_DEPTH );
5154 exit(0);
5155 case OPT_FRAMES:
5156 param->i_frame_total = X264_MAX( atoi( optarg ), 0 );
5157@@ -1318,7 +1324,7 @@ static void parse_qpfile( cli_opt_t *opt, x264_picture_t *pic, int i_frame )
5158 else if( type == 'B' ) pic->i_type = X264_TYPE_BREF;
5159 else if( type == 'b' ) pic->i_type = X264_TYPE_B;
5160 else ret = 0;
5161- if( ret != 3 || qp < -1 || qp > 51 )
5162+ if( ret != 3 || qp < -1 || qp > QP_MAX )
5163 {
5164 x264_cli_log( "x264", X264_LOG_ERROR, "can't parse qpfile for frame %d\n", i_frame );
5165 fclose( opt->qpfile );
5166diff --git a/x264.h b/x264.h
5167index 097365a..4d9b9ca 100644
5168--- a/x264.h
5169+++ b/x264.h
5170@@ -344,7 +344,7 @@ typedef struct x264_param_t
5171 {
5172 int i_rc_method; /* X264_RC_* */
5173
5174- int i_qp_constant; /* 0-51 */
5175+ int i_qp_constant; /* 0 to (51 + 6*(BIT_DEPTH-8)) */
5176 int i_qp_min; /* min allowed QP value */
5177 int i_qp_max; /* max allowed QP value */
5178 int i_qp_step; /* max QP step between frames */
5179@@ -550,7 +550,7 @@ void x264_param_apply_fastfirstpass( x264_param_t * );
5180 /* x264_param_apply_profile:
5181 * Applies the restrictions of the given profile.
5182 * Currently available profiles are, from most to least restrictive: */
5183-static const char * const x264_profile_names[] = { "baseline", "main", "high", 0 };
5184+static const char * const x264_profile_names[] = { "baseline", "main", "high", "high10", 0 };
5185
5186 /* (can be NULL, in which case the function will do nothing)
5187 *
5188--
51891.7.1