· 8 years ago · Jun 17, 2017, 09:32 PM
1From 5f104e9957cc4b69f7197fecf93648a0e2ae0e59 Mon Sep 17 00:00:00 2001
2From: Anton Mitrofanov <BugMaster@narod.ru>
3Date: Mon, 20 Sep 2010 13:10:13 +0400
4Subject: [PATCH 1/8] Fix DTS/bitrate calculation if the first PTS wasn't zero
5 Fix bitrate calculation with DTS compression.
6
7---
8 common/common.h | 1 +
9 encoder/encoder.c | 11 +++++++----
10 x264.c | 10 ++++++----
11 x264.h | 2 +-
12 4 files changed, 15 insertions(+), 9 deletions(-)
13
14diff --git a/common/common.h b/common/common.h
15index efb25be..132cfee 100644
16--- a/common/common.h
17+++ b/common/common.h
18@@ -499,6 +499,7 @@ struct x264_t
19 int i_delay; /* Number of frames buffered for B reordering */
20 int i_bframe_delay;
21 int64_t i_bframe_delay_time;
22+ int64_t i_first_pts;
23 int64_t i_init_delta;
24 int64_t i_prev_reordered_pts[2];
25 int64_t i_largest_pts;
26diff --git a/encoder/encoder.c b/encoder/encoder.c
27index fa4401b..2b679a0 100644
28--- a/encoder/encoder.c
29+++ b/encoder/encoder.c
30@@ -2329,8 +2329,10 @@ int x264_encoder_encode( x264_t *h,
31
32 fenc->i_frame = h->frames.i_input++;
33
34+ if( fenc->i_frame == 0 )
35+ h->frames.i_first_pts = fenc->i_pts;
36 if( h->frames.i_bframe_delay && fenc->i_frame == h->frames.i_bframe_delay )
37- h->frames.i_bframe_delay_time = fenc->i_pts;
38+ h->frames.i_bframe_delay_time = fenc->i_pts - h->frames.i_first_pts;
39
40 if( h->param.b_vfr_input && fenc->i_pts <= h->frames.i_largest_pts )
41 x264_log( h, X264_LOG_WARNING, "non-strictly-monotonic PTS\n" );
42@@ -2495,8 +2497,8 @@ int x264_encoder_encode( x264_t *h,
43 {
44 /* DTS compression */
45 if( h->i_frame == 1 )
46- thread_current->frames.i_init_delta = h->fenc->i_reordered_pts * h->i_dts_compress_multiplier;
47- h->fdec->i_dts = h->i_frame * thread_current->frames.i_init_delta / h->i_dts_compress_multiplier;
48+ thread_current->frames.i_init_delta = (h->fenc->i_reordered_pts - h->frames.i_first_pts) * h->i_dts_compress_multiplier;
49+ h->fdec->i_dts = h->i_frame * thread_current->frames.i_init_delta / h->i_dts_compress_multiplier + h->frames.i_first_pts * h->i_dts_compress_multiplier;
50 }
51 }
52 else
53@@ -3110,7 +3112,8 @@ void x264_encoder_close ( x264_t *h )
54 f_bitrate = fps * SUM3(h->stat.i_frame_size) / i_count / 125;
55 else
56 {
57- float duration = (float)(2 * h->frames.i_largest_pts - h->frames.i_second_largest_pts) * h->param.i_timebase_num / h->param.i_timebase_den;
58+ float duration = (float)(2 * h->frames.i_largest_pts - h->frames.i_second_largest_pts - h->frames.i_first_pts)
59+ * h->i_dts_compress_multiplier * h->param.i_timebase_num / h->param.i_timebase_den;
60 f_bitrate = SUM3(h->stat.i_frame_size) / duration / 125;
61 }
62
63diff --git a/x264.c b/x264.c
64index a1e7147..f74f096 100644
65--- a/x264.c
66+++ b/x264.c
67@@ -1584,7 +1584,7 @@ static int Encode( x264_param_t *param, cli_opt_t *opt )
68 int64_t second_largest_pts = -1;
69 int64_t ticks_per_frame;
70 double duration;
71- int prev_timebase_den = param->i_timebase_den / gcd( param->i_timebase_num, param->i_timebase_den );
72+ int prev_timebase_den;
73 int dts_compress_multiplier;
74 double pulldown_pts = 0;
75
76@@ -1603,6 +1603,8 @@ static int Encode( x264_param_t *param, cli_opt_t *opt )
77 param->i_timebase_den = param->i_fps_num * pulldown->fps_factor;
78 }
79
80+ prev_timebase_den = param->i_timebase_den / gcd( param->i_timebase_num, param->i_timebase_den );
81+
82 if( ( h = x264_encoder_open( param ) ) == NULL )
83 {
84 x264_cli_log( "x264", X264_LOG_ERROR, "x264_encoder_open failed\n" );
85@@ -1727,6 +1729,8 @@ static int Encode( x264_param_t *param, cli_opt_t *opt )
86 if( pts_warning_cnt >= MAX_PTS_WARNING && cli_log_level < X264_LOG_DEBUG )
87 x264_cli_log( "x264", X264_LOG_WARNING, "%d suppressed nonmonotonic pts warnings\n", pts_warning_cnt-MAX_PTS_WARNING );
88
89+ largest_pts *= dts_compress_multiplier;
90+ second_largest_pts *= dts_compress_multiplier;
91 /* duration algorithm fails when only 1 frame is output */
92 if( i_frame_output == 1 )
93 duration = (double)param->i_fps_den / param->i_fps_num;
94@@ -1734,8 +1738,6 @@ static int Encode( x264_param_t *param, cli_opt_t *opt )
95 duration = (double)(2 * last_dts - prev_dts - first_dts) * param->i_timebase_num / param->i_timebase_den;
96 else
97 duration = (double)(2 * largest_pts - second_largest_pts) * param->i_timebase_num / param->i_timebase_den;
98- if( !(opt->i_pulldown && !param->b_vfr_input) )
99- duration *= dts_compress_multiplier;
100
101 i_end = x264_mdate();
102 /* Erase progress indicator before printing encoding stats. */
103@@ -1754,7 +1756,7 @@ static int Encode( x264_param_t *param, cli_opt_t *opt )
104 }
105
106 filter.free( opt->hin );
107- output.close_file( opt->hout, largest_pts * dts_compress_multiplier, second_largest_pts * dts_compress_multiplier );
108+ output.close_file( opt->hout, largest_pts, second_largest_pts );
109
110 if( i_frame_output > 0 )
111 {
112diff --git a/x264.h b/x264.h
113index 8174015..71f5f55 100644
114--- a/x264.h
115+++ b/x264.h
116@@ -653,7 +653,7 @@ typedef struct
117 int b_keyframe;
118 /* In: user pts, Out: pts of encoded picture (user)*/
119 int64_t i_pts;
120- /* Out: frame dts. Since the pts of the first frame is always zero,
121+ /* Out: frame dts. When the pts of the first frame is close to zero,
122 * initial frames may have a negative dts which must be dealt with by any muxer */
123 int64_t i_dts;
124 /* In: custom encoding parameters to be set from this frame forwards
125--
1261.7.2.3
127
128
129From 5cba26f757ec00a7b95656615813e692685ee138 Mon Sep 17 00:00:00 2001
130From: Anton Mitrofanov <BugMaster@narod.ru>
131Date: Sat, 25 Sep 2010 15:55:32 -0700
132Subject: [PATCH 2/8] Fix CFR ratecontrol with timebase != 1/fps
133 Fixes VBV + DTS compression, among other things.
134
135---
136 encoder/encoder.c | 2 +-
137 encoder/slicetype.c | 4 ++--
138 x264.c | 1 +
139 x264.h | 4 +++-
140 4 files changed, 7 insertions(+), 4 deletions(-)
141
142diff --git a/encoder/encoder.c b/encoder/encoder.c
143index 2b679a0..3570776 100644
144--- a/encoder/encoder.c
145+++ b/encoder/encoder.c
146@@ -626,7 +626,7 @@ static int x264_validate_parameters( x264_t *h )
147 h->param.rc.i_lookahead = X264_MIN( h->param.rc.i_lookahead, X264_MAX( h->param.i_keyint_max, bufsize*fps ) );
148 }
149
150- if( !h->param.i_timebase_num || !h->param.i_timebase_den )
151+ if( !h->param.i_timebase_num || !h->param.i_timebase_den || !(h->param.b_vfr_input || h->param.b_pulldown) )
152 {
153 h->param.i_timebase_num = h->param.i_fps_den;
154 h->param.i_timebase_den = h->param.i_fps_num;
155diff --git a/encoder/slicetype.c b/encoder/slicetype.c
156index 0d87908..d08cf02 100644
157--- a/encoder/slicetype.c
158+++ b/encoder/slicetype.c
159@@ -1260,12 +1260,12 @@ void x264_slicetype_decide( x264_t *h )
160 if( h->param.b_vfr_input )
161 {
162 if( lookahead_size-- > 1 )
163- h->lookahead->next.list[i]->i_duration = 2 * (h->lookahead->next.list[i+1]->i_pts - h->lookahead->next.list[i]->i_pts);
164+ h->lookahead->next.list[i]->i_duration = 2 * (h->lookahead->next.list[i+1]->i_pts - h->lookahead->next.list[i]->i_pts) * h->i_dts_compress_multiplier;
165 else
166 h->lookahead->next.list[i]->i_duration = h->i_prev_duration;
167 }
168 else
169- h->lookahead->next.list[i]->i_duration = delta_tfi_divisor[h->lookahead->next.list[i]->i_pic_struct];
170+ h->lookahead->next.list[i]->i_duration = delta_tfi_divisor[h->lookahead->next.list[i]->i_pic_struct] * h->i_dts_compress_multiplier;
171 h->i_prev_duration = h->lookahead->next.list[i]->i_duration;
172
173 if( h->lookahead->next.list[i]->i_frame > h->i_disp_fields_last_frame && lookahead_size > 0 )
174diff --git a/x264.c b/x264.c
175index f74f096..5bd2af7 100644
176--- a/x264.c
177+++ b/x264.c
178@@ -1595,6 +1595,7 @@ static int Encode( x264_param_t *param, cli_opt_t *opt )
179 /* set up pulldown */
180 if( opt->i_pulldown && !param->b_vfr_input )
181 {
182+ param->b_pulldown = 1;
183 param->b_pic_struct = 1;
184 pulldown = &pulldown_values[opt->i_pulldown];
185 param->i_timebase_num = param->i_fps_den;
186diff --git a/x264.h b/x264.h
187index 71f5f55..56d424c 100644
188--- a/x264.h
189+++ b/x264.h
190@@ -383,7 +383,9 @@ typedef struct x264_param_t
191 int b_annexb; /* if set, place start codes (4 bytes) before NAL units,
192 * otherwise place size (4 bytes) before NAL units. */
193 int i_sps_id; /* SPS and PPS id number */
194- int b_vfr_input; /* VFR input */
195+ int b_vfr_input; /* VFR input. If 1, use timebase and timestamps for ratecontrol purposes.
196+ * If 0, use fps only. */
197+ int b_pulldown; /* use explicity set timebase for CFR */
198 uint32_t i_fps_num;
199 uint32_t i_fps_den;
200 uint32_t i_timebase_num; /* Timebase numerator */
201--
2021.7.2.3
203
204
205From 54073becc7cfc3a1b574d954d1017cd58cbe8b2a Mon Sep 17 00:00:00 2001
206From: Jason Garrett-Glaser <darkshikari@gmail.com>
207Date: Mon, 27 Sep 2010 05:39:13 -0700
208Subject: [PATCH 3/8] Add missing emms for dump-yuv
209
210---
211 encoder/encoder.c | 3 +++
212 1 files changed, 3 insertions(+), 0 deletions(-)
213
214diff --git a/encoder/encoder.c b/encoder/encoder.c
215index 3570776..7f3d5bc 100644
216--- a/encoder/encoder.c
217+++ b/encoder/encoder.c
218@@ -2934,7 +2934,10 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
219 }
220
221 if( h->param.psz_dump_yuv )
222+ {
223 x264_frame_dump( h );
224+ x264_emms();
225+ }
226
227 return frame_size;
228 }
229--
2301.7.2.3
231
232
233From 991166c935516f19f65485ebb447f079aca41e0e Mon Sep 17 00:00:00 2001
234From: Jason Garrett-Glaser <darkshikari@gmail.com>
235Date: Tue, 21 Sep 2010 17:11:00 -0700
236Subject: [PATCH 4/8] Make slice-max-size more aggressive in considering escape bytes
237 The x264 assumption of randomly distributed escape bytes fails in the case of CABAC + an enormous number of identical macroblocks.
238 This patch attempts to compensate for this.
239 It is probably safe to assume in calling applications that x264 practically never violates the slice size limitation.
240
241---
242 encoder/encoder.c | 63 +++++++++++++++++++++++++++++++++-------------------
243 1 files changed, 40 insertions(+), 23 deletions(-)
244
245diff --git a/encoder/encoder.c b/encoder/encoder.c
246index 7f3d5bc..b9e66ac 100644
247--- a/encoder/encoder.c
248+++ b/encoder/encoder.c
249@@ -1834,10 +1834,12 @@ static int x264_slice_write( x264_t *h )
250 uint8_t cabac_prevbyte_bak = 0; /* Shut up GCC. */
251 int mv_bits_bak = 0;
252 int tex_bits_bak = 0;
253- /* Assume no more than 3 bytes of NALU escaping.
254- * NALUs other than the first use a 3-byte startcode. */
255- int overhead_guess = (NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal)) + 3;
256- int slice_max_size = h->param.i_slice_max_size > 0 ? (h->param.i_slice_max_size-overhead_guess)*8 : INT_MAX;
257+ /* NALUs other than the first use a 3-byte startcode.
258+ * Add one extra byte for the rbsp, and one more for the final CABAC putbyte.
259+ * Then add an extra 5 bytes just in case, to account for random NAL escapes and
260+ * other inaccuracies. */
261+ int overhead_guess = (NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal)) + 1 + h->param.b_cabac + 5;
262+ int slice_max_size = h->param.i_slice_max_size > 0 ? (h->param.i_slice_max_size-overhead_guess)*8 : 0;
263 int starting_bits = bs_pos(&h->out.bs);
264 int b_deblock = h->sh.i_disable_deblocking_filter_idc != 1;
265 int b_hpel = h->fdec->b_kept_as_ref;
266@@ -1884,7 +1886,7 @@ static int x264_slice_write( x264_t *h )
267 if( x264_bitstream_check_buffer( h ) )
268 return -1;
269
270- if( h->param.i_slice_max_size > 0 )
271+ if( slice_max_size )
272 {
273 mv_bits_bak = h->stat.frame.i_mv_bits;
274 tex_bits_bak = h->stat.frame.i_tex_bits;
275@@ -1948,35 +1950,50 @@ static int x264_slice_write( x264_t *h )
276 int total_bits = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac);
277 int mb_size = total_bits - mb_spos;
278
279- /* We'll just re-encode this last macroblock if we go over the max slice size. */
280- if( total_bits - starting_bits > slice_max_size && !h->mb.b_reencode_mb )
281- {
282- if( mb_xy != h->sh.i_first_mb )
283+ if( slice_max_size )
284+ {
285+ /* Count the skip run, just in case. */
286+ if( !h->param.b_cabac )
287+ total_bits += bs_size_ue_big( i_skip );
288+ /* HACK: we assume no more than 3 bytes of NALU escaping, but
289+ * this can fail in CABAC streams with an extremely large number of identical
290+ * blocks in sequence (e.g. all-black intra blocks).
291+ * Thus, every 64 blocks, pretend we've used a byte.
292+ * For reference, a seqeuence of identical empty-CBP i16x16 blocks will use
293+ * one byte after 26 macroblocks, assuming a perfectly adapted CABAC.
294+ * That's 78 macroblocks to generate the 3-byte sequence to trigger an escape. */
295+ else if( ((mb_xy - h->sh.i_first_mb) & 63) == 63 )
296+ slice_max_size -= 8;
297+ /* We'll just re-encode this last macroblock if we go over the max slice size. */
298+ if( total_bits - starting_bits > slice_max_size && !h->mb.b_reencode_mb )
299 {
300- h->stat.frame.i_mv_bits = mv_bits_bak;
301- h->stat.frame.i_tex_bits = tex_bits_bak;
302- if( h->param.b_cabac )
303+ if( mb_xy != h->sh.i_first_mb )
304 {
305- memcpy( &h->cabac, &cabac_bak, offsetof(x264_cabac_t, f8_bits_encoded) );
306- h->cabac.p[-1] = cabac_prevbyte_bak;
307+ h->stat.frame.i_mv_bits = mv_bits_bak;
308+ h->stat.frame.i_tex_bits = tex_bits_bak;
309+ if( h->param.b_cabac )
310+ {
311+ memcpy( &h->cabac, &cabac_bak, offsetof(x264_cabac_t, f8_bits_encoded) );
312+ h->cabac.p[-1] = cabac_prevbyte_bak;
313+ }
314+ else
315+ {
316+ h->out.bs = bs_bak;
317+ i_skip = i_skip_bak;
318+ }
319+ h->mb.b_reencode_mb = 1;
320+ h->sh.i_last_mb = mb_xy-1;
321+ break;
322 }
323 else
324 {
325- h->out.bs = bs_bak;
326- i_skip = i_skip_bak;
327+ h->sh.i_last_mb = mb_xy;
328+ h->mb.b_reencode_mb = 0;
329 }
330- h->mb.b_reencode_mb = 1;
331- h->sh.i_last_mb = mb_xy-1;
332- break;
333 }
334 else
335- {
336- h->sh.i_last_mb = mb_xy;
337 h->mb.b_reencode_mb = 0;
338- }
339 }
340- else
341- h->mb.b_reencode_mb = 0;
342
343 #if HAVE_VISUALIZE
344 if( h->param.b_visualize )
345--
3461.7.2.3
347
348
349From 3f15d8ea707d4985a38059fe58ce0a0993ceeb94 Mon Sep 17 00:00:00 2001
350From: Jason Garrett-Glaser <darkshikari@gmail.com>
351Date: Mon, 27 Sep 2010 05:39:02 -0700
352Subject: [PATCH 5/8] Various cosmetics
353
354---
355 encoder/encoder.c | 12 ++++++------
356 encoder/set.c | 17 ++---------------
357 2 files changed, 8 insertions(+), 21 deletions(-)
358
359diff --git a/encoder/encoder.c b/encoder/encoder.c
360index b9e66ac..28ded05 100644
361--- a/encoder/encoder.c
362+++ b/encoder/encoder.c
363@@ -2943,12 +2943,12 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
364
365 /* Remove duplicates, must be done near the end as breaks h->fref0 array
366 * by freeing some of its pointers. */
367- for( int i = 0; i < h->i_ref0; i++ )
368- if( h->fref0[i] && h->fref0[i]->b_duplicate )
369- {
370- x264_frame_push_blank_unused( h, h->fref0[i] );
371- h->fref0[i] = 0;
372- }
373+ for( int i = 0; i < h->i_ref0; i++ )
374+ if( h->fref0[i] && h->fref0[i]->b_duplicate )
375+ {
376+ x264_frame_push_blank_unused( h, h->fref0[i] );
377+ h->fref0[i] = 0;
378+ }
379
380 if( h->param.psz_dump_yuv )
381 {
382diff --git a/encoder/set.c b/encoder/set.c
383index b0d2149..a003012 100644
384--- a/encoder/set.c
385+++ b/encoder/set.c
386@@ -294,34 +294,21 @@ void x264_sps_write( bs_t *s, x264_sps_t *sps )
387 }
388 else if( sps->i_poc_type == 1 )
389 {
390- int i;
391-
392 bs_write( s, 1, sps->b_delta_pic_order_always_zero );
393 bs_write_se( s, sps->i_offset_for_non_ref_pic );
394 bs_write_se( s, sps->i_offset_for_top_to_bottom_field );
395 bs_write_ue( s, sps->i_num_ref_frames_in_poc_cycle );
396
397- for( i = 0; i < sps->i_num_ref_frames_in_poc_cycle; i++ )
398- {
399+ for( int i = 0; i < sps->i_num_ref_frames_in_poc_cycle; i++ )
400 bs_write_se( s, sps->i_offset_for_ref_frame[i] );
401- }
402 }
403 bs_write_ue( s, sps->i_num_ref_frames );
404 bs_write( s, 1, sps->b_gaps_in_frame_num_value_allowed );
405 bs_write_ue( s, sps->i_mb_width - 1 );
406- if (sps->b_frame_mbs_only)
407- {
408- bs_write_ue( s, sps->i_mb_height - 1);
409- }
410- else // interlaced
411- {
412- bs_write_ue( s, sps->i_mb_height/2 - 1);
413- }
414+ bs_write_ue( s, (sps->i_mb_height >> !sps->b_frame_mbs_only) - 1);
415 bs_write( s, 1, sps->b_frame_mbs_only );
416 if( !sps->b_frame_mbs_only )
417- {
418 bs_write( s, 1, sps->b_mb_adaptive_frame_field );
419- }
420 bs_write( s, 1, sps->b_direct8x8_inference );
421
422 bs_write( s, 1, sps->b_crop );
423--
4241.7.2.3
425
426
427From 377efcd2643ba657a6d26c4599a9cc4022ca84e8 Mon Sep 17 00:00:00 2001
428From: Alex Wright <alexw0885@gmail.com>
429Date: Sun, 19 Sep 2010 05:08:22 -0700
430Subject: [PATCH 6/8] Chroma mode decision/subpel for B-frames
431 Improves compression ~0.4-1%. Helps more on videos with lots of chroma detail.
432 Enabled at subme 9 (preset slower) and higher.
433
434---
435 common/macroblock.c | 5 +-
436 encoder/analyse.c | 118 +++++++++++++++++++++++++++++++++++++++++++++++++--
437 2 files changed, 117 insertions(+), 6 deletions(-)
438
439diff --git a/common/macroblock.c b/common/macroblock.c
440index b6c91d6..7f0348e 100644
441--- a/common/macroblock.c
442+++ b/common/macroblock.c
443@@ -448,8 +448,9 @@ void x264_macroblock_thread_init( x264_t *h )
444 h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
445 if( h->sh.i_type == SLICE_TYPE_B && (h->mb.i_subpel_refine == 6 || h->mb.i_subpel_refine == 8) )
446 h->mb.i_subpel_refine--;
447- h->mb.b_chroma_me = h->param.analyse.b_chroma_me && h->sh.i_type == SLICE_TYPE_P
448- && h->mb.i_subpel_refine >= 5;
449+ h->mb.b_chroma_me = h->param.analyse.b_chroma_me &&
450+ ((h->sh.i_type == SLICE_TYPE_P && h->mb.i_subpel_refine >= 5) ||
451+ (h->sh.i_type == SLICE_TYPE_B && h->mb.i_subpel_refine >= 9));
452 h->mb.b_dct_decimate = h->sh.i_type == SLICE_TYPE_B ||
453 (h->param.analyse.b_dct_decimate && h->sh.i_type != SLICE_TYPE_I);
454
455diff --git a/encoder/analyse.c b/encoder/analyse.c
456index c4162e9..6ed13ba 100644
457--- a/encoder/analyse.c
458+++ b/encoder/analyse.c
459@@ -1679,6 +1679,37 @@ static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8
460 a->l0.i_cost4x8[i8x8] += x264_mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_4x8 );
461 }
462
463+static ALWAYS_INLINE int x264_analyse_bi_chroma( x264_t *h, x264_mb_analysis_t *a, int idx, int i_pixel )
464+{
465+ ALIGNED_ARRAY_8( pixel, pix, [4],[8*8] );
466+ ALIGNED_ARRAY_8( pixel, bi, [2],[8*8] );
467+ int l0_mvy_offset, l1_mvy_offset;
468+ int i_chroma_cost = 0;
469+
470+#define COST_BI_CHROMA( m0, m1, width, height ) \
471+{ \
472+ l0_mvy_offset = h->mb.b_interlaced & m0.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0; \
473+ l1_mvy_offset = h->mb.b_interlaced & m1.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0; \
474+ h->mc.mc_chroma( pix[0], pix[1], 8, m0.p_fref[4], m0.i_stride[1], m0.mv[0], m0.mv[1] + l0_mvy_offset, width, height ); \
475+ h->mc.mc_chroma( pix[2], pix[3], 8, m1.p_fref[4], m1.i_stride[1], m1.mv[0], m1.mv[1] + l1_mvy_offset, width, height ); \
476+ h->mc.avg[i_pixel+3]( bi[0], 8, pix[0], 8, pix[2], 8, h->mb.bipred_weight[m0.i_ref][m1.i_ref] ); \
477+ h->mc.avg[i_pixel+3]( bi[1], 8, pix[1], 8, pix[3], 8, h->mb.bipred_weight[m0.i_ref][m1.i_ref] ); \
478+ i_chroma_cost = h->pixf.mbcmp[i_pixel+3]( m0.p_fenc[1], FENC_STRIDE, bi[0], 8 ); \
479+ i_chroma_cost += h->pixf.mbcmp[i_pixel+3]( m0.p_fenc[2], FENC_STRIDE, bi[1], 8 ); \
480+}
481+
482+ if( i_pixel == PIXEL_16x16 )
483+ COST_BI_CHROMA( a->l0.bi16x16, a->l1.bi16x16, 8, 8 )
484+ else if( i_pixel == PIXEL_16x8 )
485+ COST_BI_CHROMA( a->l0.me16x8[idx], a->l1.me16x8[idx], 8, 4 )
486+ else if( i_pixel == PIXEL_8x16 )
487+ COST_BI_CHROMA( a->l0.me8x16[idx], a->l1.me8x16[idx], 4, 8 )
488+ else
489+ COST_BI_CHROMA( a->l0.me8x8[idx], a->l1.me8x8[idx], 4, 4 )
490+
491+ return i_chroma_cost;
492+}
493+
494 static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
495 {
496 /* Assumes that fdec still contains the results of
497@@ -1693,15 +1724,29 @@ static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
498 {
499 const int x = (i&1)*8;
500 const int y = (i>>1)*8;
501- a->i_cost16x16direct +=
502- a->i_cost8x8direct[i] =
503- h->pixf.mbcmp[PIXEL_8x8]( &p_fenc[x+y*FENC_STRIDE], FENC_STRIDE, &p_fdec[x+y*FDEC_STRIDE], FDEC_STRIDE );
504+ a->i_cost8x8direct[i] = h->pixf.mbcmp[PIXEL_8x8]( &p_fenc[x+y*FENC_STRIDE], FENC_STRIDE,
505+ &p_fdec[x+y*FDEC_STRIDE], FDEC_STRIDE );
506+ if( h->mb.b_chroma_me )
507+ {
508+ a->i_cost8x8direct[i] += h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[1][(x>>1)+(y>>1)*FENC_STRIDE], FENC_STRIDE,
509+ &h->mb.pic.p_fdec[1][(x>>1)+(y>>1)*FDEC_STRIDE], FDEC_STRIDE )
510+ + h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[2][(x>>1)+(y>>1)*FENC_STRIDE], FENC_STRIDE,
511+ &h->mb.pic.p_fdec[2][(x>>1)+(y>>1)*FDEC_STRIDE], FDEC_STRIDE );
512+ }
513+ a->i_cost16x16direct += a->i_cost8x8direct[i];
514
515 /* mb type cost */
516 a->i_cost8x8direct[i] += a->i_lambda * i_sub_mb_b_cost_table[D_DIRECT_8x8];
517 }
518 else
519+ {
520 a->i_cost16x16direct += h->pixf.mbcmp[PIXEL_16x16]( p_fenc, FENC_STRIDE, p_fdec, FDEC_STRIDE );
521+ if( h->mb.b_chroma_me )
522+ {
523+ a->i_cost16x16direct += h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fenc[1], FENC_STRIDE, h->mb.pic.p_fdec[1], FDEC_STRIDE )
524+ + h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fenc[2], FENC_STRIDE, h->mb.pic.p_fdec[2], FDEC_STRIDE );
525+ }
526+ }
527 }
528
529 static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
530@@ -1807,6 +1852,9 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
531 + a->l0.bi16x16.cost_mv
532 + a->l1.bi16x16.cost_mv;
533
534+ if( h->mb.b_chroma_me )
535+ a->i_cost16x16bi += x264_analyse_bi_chroma( h, a, 0, PIXEL_16x16 );
536+
537 /* Always try the 0,0,0,0 vector; helps avoid errant motion vectors in fades */
538 if( M32( a->l0.bi16x16.mv ) | M32( a->l1.bi16x16.mv ) )
539 {
540@@ -1819,6 +1867,39 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
541 h->mb.bipred_weight[a->l0.bi16x16.i_ref][a->l1.bi16x16.i_ref] );
542 int cost00 = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, pix0, 16 )
543 + ref_costs + l0_mv_cost + l1_mv_cost;
544+
545+ if( h->mb.b_chroma_me )
546+ {
547+ ALIGNED_ARRAY_8( pixel, pixuv, [2],[8*FENC_STRIDE] );
548+ ALIGNED_ARRAY_8( pixel, bi, [8*FENC_STRIDE] );
549+
550+ if( h->mb.b_interlaced & a->l0.bi16x16.i_ref )
551+ {
552+ int l0_mvy_offset = h->mb.b_interlaced & a->l0.bi16x16.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
553+ h->mc.mc_chroma( pixuv[0], pixuv[0]+8, FENC_STRIDE, h->mb.pic.p_fref[0][a->l0.bi16x16.i_ref][4],
554+ h->mb.pic.i_stride[1], 0, 0 + l0_mvy_offset, 8, 8 );
555+ }
556+ else
557+ h->mc.load_deinterleave_8x8x2_fenc( pixuv[0], h->mb.pic.p_fref[0][a->l0.bi16x16.i_ref][4], h->mb.pic.i_stride[1] );
558+
559+ if( h->mb.b_interlaced & a->l1.bi16x16.i_ref )
560+ {
561+ int l1_mvy_offset = h->mb.b_interlaced & a->l1.bi16x16.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
562+ h->mc.mc_chroma( pixuv[1], pixuv[1]+8, FENC_STRIDE, h->mb.pic.p_fref[1][a->l1.bi16x16.i_ref][4],
563+ h->mb.pic.i_stride[1], 0, 0 + l1_mvy_offset, 8, 8 );
564+ }
565+ else
566+ h->mc.load_deinterleave_8x8x2_fenc( pixuv[1], h->mb.pic.p_fref[1][a->l1.bi16x16.i_ref][4], h->mb.pic.i_stride[1] );
567+
568+ h->mc.avg[PIXEL_8x8]( bi, FENC_STRIDE, pixuv[0], FENC_STRIDE, pixuv[1], FENC_STRIDE,
569+ h->mb.bipred_weight[a->l0.bi16x16.i_ref][a->l1.bi16x16.i_ref] );
570+ h->mc.avg[PIXEL_8x8]( bi+8, FENC_STRIDE, pixuv[0]+8, FENC_STRIDE, pixuv[1]+8, FENC_STRIDE,
571+ h->mb.bipred_weight[a->l0.bi16x16.i_ref][a->l1.bi16x16.i_ref] );
572+
573+ cost00 += h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fenc[1], FENC_STRIDE, bi, FENC_STRIDE )
574+ + h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fenc[2], FENC_STRIDE, bi+8, FENC_STRIDE );
575+ }
576+
577 if( cost00 < a->i_cost16x16bi )
578 {
579 M32( a->l0.bi16x16.mv ) = 0;
580@@ -2017,6 +2098,13 @@ static void x264_mb_analyse_inter_b8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
581 + a->l0.me8x8[i].i_ref_cost + a->l1.me8x8[i].i_ref_cost
582 + a->i_lambda * i_sub_mb_b_cost_table[D_BI_8x8];
583
584+ if( h->mb.b_chroma_me )
585+ {
586+ int i_chroma_cost = x264_analyse_bi_chroma( h, a, i, PIXEL_8x8 );
587+ i_part_cost_bi += i_chroma_cost;
588+ a->i_satd8x8[2][i] += i_chroma_cost;
589+ }
590+
591 a->l0.me8x8[i].cost += a->i_lambda * i_sub_mb_b_cost_table[D_L0_8x8];
592 a->l1.me8x8[i].cost += a->i_lambda * i_sub_mb_b_cost_table[D_L1_8x8];
593
594@@ -2090,6 +2178,13 @@ static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
595 a->l0.me8x8[i].cost += a->i_lambda * i_sub_mb_b_cost_table[D_L0_8x8];
596 a->l1.me8x8[i].cost += a->i_lambda * i_sub_mb_b_cost_table[D_L1_8x8];
597
598+ if( h->mb.b_chroma_me )
599+ {
600+ int i_chroma_cost = x264_analyse_bi_chroma( h, a, i, PIXEL_8x8 );
601+ i_part_cost_bi += i_chroma_cost;
602+ a->i_satd8x8[2][i] += i_chroma_cost;
603+ }
604+
605 i_part_cost = a->l0.me8x8[i].cost;
606 h->mb.i_sub_partition[i] = D_L0_8x8;
607 COPY2_IF_LT( i_part_cost, a->l1.me8x8[i].cost, h->mb.i_sub_partition[i], D_L1_8x8 );
608@@ -2162,6 +2257,9 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a, int i
609 + a->l0.me16x8[i].cost_mv + a->l1.me16x8[i].cost_mv + a->l0.me16x8[i].i_ref_cost
610 + a->l1.me16x8[i].i_ref_cost;
611
612+ if( h->mb.b_chroma_me )
613+ i_part_cost_bi += x264_analyse_bi_chroma( h, a, i, PIXEL_16x8 );
614+
615 i_part_cost = a->l0.me16x8[i].cost;
616 a->i_mb_partition16x8[i] = D_L0_8x8; /* not actually 8x8, only the L0 matters */
617
618@@ -2252,6 +2350,9 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a, int i
619 + a->l0.me8x16[i].cost_mv + a->l1.me8x16[i].cost_mv + a->l0.me8x16[i].i_ref_cost
620 + a->l1.me8x16[i].i_ref_cost;
621
622+ if( h->mb.b_chroma_me )
623+ i_part_cost_bi += x264_analyse_bi_chroma( h, a, i, PIXEL_8x16 );
624+
625 i_part_cost = a->l0.me8x16[i].cost;
626 a->i_mb_partition8x16[i] = D_L0_8x8;
627
628@@ -3249,7 +3350,16 @@ intra_analysis:
629 h->mb.i_partition = i_partition;
630 }
631
632- x264_mb_analyse_intra( h, &analysis, i_satd_inter );
633+ if( h->mb.b_chroma_me )
634+ {
635+ x264_mb_analyse_intra_chroma( h, &analysis );
636+ x264_mb_analyse_intra( h, &analysis, i_satd_inter - analysis.i_satd_i8x8chroma );
637+ analysis.i_satd_i16x16 += analysis.i_satd_i8x8chroma;
638+ analysis.i_satd_i8x8 += analysis.i_satd_i8x8chroma;
639+ analysis.i_satd_i4x4 += analysis.i_satd_i8x8chroma;
640+ }
641+ else
642+ x264_mb_analyse_intra( h, &analysis, i_satd_inter );
643
644 if( analysis.i_mbrd )
645 {
646--
6471.7.2.3
648
649
650From 594ff9664e7ac57a53fae8d9b5a1ecdf2ba5fa75 Mon Sep 17 00:00:00 2001
651From: Oskar Arvidsson <oskar@irock.se>
652Date: Mon, 27 Sep 2010 16:02:20 +0200
653Subject: [PATCH 7/8] Finish support for high-depth video throughout x264
654 Add support for high depth input in libx264.
655 Add support for 16-bit colorspaces in the filtering system.
656 Add support for input bit depths in the interval [9,16] with the raw demuxer.
657 Add a depth filter to dither input to x264.
658
659---
660 Makefile | 2 +-
661 common/common.c | 17 ++--
662 common/frame.c | 14 +++
663 common/mc.c | 11 +--
664 encoder/encoder.c | 8 +-
665 filters/video/crop.c | 8 +-
666 filters/video/depth.c | 228 ++++++++++++++++++++++++++++++++++++++++++++++
667 filters/video/internal.c | 1 +
668 filters/video/resize.c | 84 +++++++++++++----
669 filters/video/video.c | 1 +
670 input/input.c | 13 ++-
671 input/input.h | 7 +-
672 input/raw.c | 33 ++++++-
673 x264.c | 21 ++++-
674 x264.h | 14 +++-
675 15 files changed, 405 insertions(+), 57 deletions(-)
676 create mode 100644 filters/video/depth.c
677
678diff --git a/Makefile b/Makefile
679index bab55e5..0cd7b82 100644
680--- a/Makefile
681+++ b/Makefile
682@@ -18,7 +18,7 @@ SRCCLI = x264.c input/input.c input/timecode.c input/raw.c input/y4m.c \
683 output/flv.c output/flv_bytestream.c filters/filters.c \
684 filters/video/video.c filters/video/source.c filters/video/internal.c \
685 filters/video/resize.c filters/video/cache.c filters/video/fix_vfr_pts.c \
686- filters/video/select_every.c filters/video/crop.c
687+ filters/video/select_every.c filters/video/crop.c filters/video/depth.c
688
689 SRCSO =
690
691diff --git a/common/common.c b/common/common.c
692index c329cb0..aff5fc3 100644
693--- a/common/common.c
694+++ b/common/common.c
695@@ -33,6 +33,8 @@
696 #include <malloc.h>
697 #endif
698
699+const int x264_bit_depth = BIT_DEPTH;
700+
701 static void x264_log_default( void *, int, const char *, va_list );
702
703 /****************************************************************************
704@@ -1047,19 +1049,20 @@ int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_heigh
705 x264_picture_init( pic );
706 pic->img.i_csp = i_csp;
707 pic->img.i_plane = csp == X264_CSP_NV12 ? 2 : 3;
708- pic->img.plane[0] = x264_malloc( 3 * i_width * i_height / 2 );
709+ int depth_factor = i_csp & X264_CSP_HIGH_DEPTH ? 2 : 1;
710+ pic->img.plane[0] = x264_malloc( 3 * i_width * i_height / 2 * depth_factor );
711 if( !pic->img.plane[0] )
712 return -1;
713- pic->img.plane[1] = pic->img.plane[0] + i_width * i_height;
714+ pic->img.plane[1] = pic->img.plane[0] + i_width * i_height * depth_factor;
715 if( csp != X264_CSP_NV12 )
716- pic->img.plane[2] = pic->img.plane[1] + i_width * i_height / 4;
717- pic->img.i_stride[0] = i_width;
718+ pic->img.plane[2] = pic->img.plane[1] + i_width * i_height / 4 * depth_factor;
719+ pic->img.i_stride[0] = i_width * depth_factor;
720 if( csp == X264_CSP_NV12 )
721- pic->img.i_stride[1] = i_width;
722+ pic->img.i_stride[1] = i_width * depth_factor;
723 else
724 {
725- pic->img.i_stride[1] = i_width / 2;
726- pic->img.i_stride[2] = i_width / 2;
727+ pic->img.i_stride[1] = i_width / 2 * depth_factor;
728+ pic->img.i_stride[2] = i_width / 2 * depth_factor;
729 }
730 return 0;
731 }
732diff --git a/common/frame.c b/common/frame.c
733index 0c3d77f..95666da 100644
734--- a/common/frame.c
735+++ b/common/frame.c
736@@ -263,6 +263,20 @@ int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
737 return -1;
738 }
739
740+#if X264_HIGH_BIT_DEPTH
741+ if( !(src->img.i_csp & X264_CSP_HIGH_DEPTH) )
742+ {
743+ x264_log( h, X264_LOG_ERROR, "This build of x264 requires high depth input. Rebuild to support 8-bit input.\n" );
744+ return -1;
745+ }
746+#else
747+ if( src->img.i_csp & X264_CSP_HIGH_DEPTH )
748+ {
749+ x264_log( h, X264_LOG_ERROR, "This build of x264 requires 8-bit input. Rebuild to support high depth input.\n" );
750+ return -1;
751+ }
752+#endif
753+
754 dst->i_type = src->i_type;
755 dst->i_qpplus1 = src->i_qpplus1;
756 dst->i_pts = dst->i_reordered_pts = src->i_pts;
757diff --git a/common/mc.c b/common/mc.c
758index 5b58a76..acc2312 100644
759--- a/common/mc.c
760+++ b/common/mc.c
761@@ -302,12 +302,7 @@ void x264_plane_copy_c( pixel *dst, int i_dst,
762 {
763 while( h-- )
764 {
765-#if X264_HIGH_BIT_DEPTH
766- for( int i = 0; i < w; i++ )
767- dst[i] = src[i] << (BIT_DEPTH-8);
768-#else
769- memcpy( dst, src, w );
770-#endif
771+ memcpy( dst, src, w * sizeof(pixel) );
772 dst += i_dst;
773 src += i_src;
774 }
775@@ -320,8 +315,8 @@ void x264_plane_copy_interleave_c( pixel *dst, int i_dst,
776 for( int y=0; y<h; y++, dst+=i_dst, srcu+=i_srcu, srcv+=i_srcv )
777 for( int x=0; x<w; x++ )
778 {
779- dst[2*x] = srcu[x] << (BIT_DEPTH-8);
780- dst[2*x+1] = srcv[x] << (BIT_DEPTH-8);
781+ dst[2*x] = ((pixel*)srcu)[x];
782+ dst[2*x+1] = ((pixel*)srcv)[x];
783 }
784 }
785
786diff --git a/encoder/encoder.c b/encoder/encoder.c
787index 28ded05..62a4350 100644
788--- a/encoder/encoder.c
789+++ b/encoder/encoder.c
790@@ -2777,12 +2777,14 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
791 x264_log( h, X264_LOG_WARNING, "invalid DTS: PTS is less than DTS\n" );
792
793 pic_out->img.i_csp = X264_CSP_NV12;
794+#if X264_HIGH_BIT_DEPTH
795+ pic_out->img.i_csp |= X264_CSP_HIGH_DEPTH;
796+#endif
797 pic_out->img.i_plane = h->fdec->i_plane;
798 for( int i = 0; i < 2; i++ )
799 {
800- pic_out->img.i_stride[i] = h->fdec->i_stride[i];
801- // FIXME This breaks the API when pixel != uint8_t.
802- pic_out->img.plane[i] = h->fdec->plane[i];
803+ pic_out->img.i_stride[i] = h->fdec->i_stride[i] * sizeof(pixel);
804+ pic_out->img.plane[i] = (uint8_t*)h->fdec->plane[i];
805 }
806
807 x264_frame_push_unused( thread_current, h->fenc );
808diff --git a/filters/video/crop.c b/filters/video/crop.c
809index 2a3c2b1..b70476e 100644
810--- a/filters/video/crop.c
811+++ b/filters/video/crop.c
812@@ -103,8 +103,12 @@ static int get_frame( hnd_t handle, cli_pic_t *output, int frame )
813 output->img.height = h->dims[3];
814 /* shift the plane pointers down 'top' rows and right 'left' columns. */
815 for( int i = 0; i < output->img.planes; i++ )
816- output->img.plane[i] += (int)(output->img.stride[i] * h->dims[1] * h->csp->height[i]
817- + h->dims[0] * h->csp->width[i]);
818+ {
819+ intptr_t offset = output->img.stride[i] * h->dims[1] * h->csp->height[i];
820+ offset += h->dims[0] * h->csp->width[i];
821+ offset *= x264_cli_csp_depth_factor( output->img.csp );
822+ output->img.plane[i] += offset;
823+ }
824 return 0;
825 }
826
827diff --git a/filters/video/depth.c b/filters/video/depth.c
828new file mode 100644
829index 0000000..a0411c5
830--- /dev/null
831+++ b/filters/video/depth.c
832@@ -0,0 +1,228 @@
833+/*****************************************************************************
834+ * depth.c: x264 video depth filter
835+ *****************************************************************************
836+ * Copyright (C) 2010 Oskar Arvidsson <oskar@irock.se>
837+ *
838+ * This program is free software; you can redistribute it and/or modify
839+ * it under the terms of the GNU General Public License as published by
840+ * the Free Software Foundation; either version 2 of the License, or
841+ * (at your option) any later version.
842+ *
843+ * This program is distributed in the hope that it will be useful,
844+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
845+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
846+ * GNU General Public License for more details.
847+ *
848+ * You should have received a copy of the GNU General Public License
849+ * along with this program; if not, write to the Free Software
850+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
851+ *****************************************************************************/
852+
853+#include "video.h"
854+#define NAME "depth"
855+#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, NAME, __VA_ARGS__ )
856+
857+cli_vid_filter_t depth_filter;
858+
859+typedef struct
860+{
861+ hnd_t prev_hnd;
862+ cli_vid_filter_t prev_filter;
863+
864+ int bit_depth;
865+ int dst_csp;
866+ cli_pic_t buffer;
867+ int16_t *error_buf;
868+} depth_hnd_t;
869+
870+static int depth_filter_csp_is_supported( int csp )
871+{
872+ int csp_mask = csp & X264_CSP_MASK;
873+ return csp_mask == X264_CSP_I420 ||
874+ csp_mask == X264_CSP_I422 ||
875+ csp_mask == X264_CSP_I444 ||
876+ csp_mask == X264_CSP_YV12 ||
877+ csp_mask == X264_CSP_NV12;
878+}
879+
880+static int csp_num_interleaved( int csp, int plane )
881+{
882+ int csp_mask = csp & X264_CSP_MASK;
883+ return ( csp_mask == X264_CSP_NV12 && plane == 1 ) ? 2 : 1;
884+}
885+
886+/* The dithering algorithm is based on Sierra-2-4A error diffusion. It has been
887+ * written in such a way so that if the source has been upconverted using the
888+ * same algorithm as used in scale_image, dithering down to the source bit
889+ * depth again is lossless. */
890+#define DITHER_PLANE( pitch ) \
891+static void dither_plane_##pitch( pixel *dst, int dst_stride, uint16_t *src, int src_stride, \
892+ int width, int height, int16_t *errors ) \
893+{ \
894+ const int lshift = 16-BIT_DEPTH; \
895+ const int rshift = 2*BIT_DEPTH-16; \
896+ const int pixel_max = (1 << BIT_DEPTH)-1; \
897+ const int half = 1 << (16-BIT_DEPTH); \
898+ memset( errors, 0, (width+1) * sizeof(int16_t) ); \
899+ for( int y = 0; y < height; y++, src += src_stride, dst += dst_stride ) \
900+ { \
901+ int err = 0; \
902+ for( int x = 0; x < width; x++ ) \
903+ { \
904+ err += errors[x] + errors[x+1]; \
905+ dst[x*pitch] = x264_clip3( (((src[x*pitch]+half)<<2)+err)*pixel_max >> 18, 0, pixel_max ); \
906+ errors[x] = src[x*pitch] - (dst[x*pitch] << lshift) - (dst[x*pitch] >> rshift); \
907+ err = errors[x] << 1; \
908+ } \
909+ } \
910+}
911+
912+DITHER_PLANE( 1 )
913+DITHER_PLANE( 2 )
914+
915+static void dither_image( cli_image_t *out, cli_image_t *img, int16_t *error_buf )
916+{
917+ int csp_mask = img->csp & X264_CSP_MASK;
918+ for( int i = 0; i < img->planes; i++ )
919+ {
920+ int num_interleaved = csp_num_interleaved( img->csp, i );
921+ int height = x264_cli_csps[csp_mask].height[i] * img->height;
922+ int width = x264_cli_csps[csp_mask].width[i] * img->width / num_interleaved;
923+
924+#define CALL_DITHER_PLANE( pitch, off ) \
925+ dither_plane_##pitch( ((pixel*)out->plane[i])+off, out->stride[i]/sizeof(pixel), \
926+ ((uint16_t*)img->plane[i])+off, img->stride[i]/2, width, height, error_buf )
927+
928+ if( num_interleaved == 1 )
929+ {
930+ CALL_DITHER_PLANE( 1, 0 );
931+ }
932+ else
933+ {
934+ CALL_DITHER_PLANE( 2, 0 );
935+ CALL_DITHER_PLANE( 2, 1 );
936+ }
937+ }
938+}
939+
940+static void scale_image( cli_image_t *output, cli_image_t *img )
941+{
942+ /* this function mimics how swscale does upconversion. 8-bit is converted
943+ * to 16-bit through left shifting the orginal value with 8 and then adding
944+ * the original value to that. This effectively keeps the full color range
945+ * while also being fast. for n-bit we basically do the same thing, but we
946+ * discard the lower 16-n bits. */
947+ int csp_mask = img->csp & X264_CSP_MASK;
948+ const int shift = 16-BIT_DEPTH;
949+ for( int i = 0; i < img->planes; i++ )
950+ {
951+ uint8_t *src = img->plane[i];
952+ uint16_t *dst = (uint16_t*)output->plane[i];
953+ int height = x264_cli_csps[csp_mask].height[i] * img->height;
954+ int width = x264_cli_csps[csp_mask].width[i] * img->width;
955+
956+ for( int j = 0; j < height; j++ )
957+ {
958+ for( int k = 0; k < width; k++ )
959+ dst[k] = ((src[k] << 8) + src[k]) >> shift;
960+
961+ src += img->stride[i];
962+ dst += output->stride[i]/2;
963+ }
964+ }
965+}
966+
967+static int get_frame( hnd_t handle, cli_pic_t *output, int frame )
968+{
969+ depth_hnd_t *h = handle;
970+
971+ if( h->prev_filter.get_frame( h->prev_hnd, output, frame ) )
972+ return -1;
973+
974+ if( h->bit_depth < 16 && output->img.csp & X264_CSP_HIGH_DEPTH )
975+ {
976+ dither_image( &h->buffer.img, &output->img, h->error_buf );
977+ output->img = h->buffer.img;
978+ }
979+ else if( h->bit_depth > 8 && !(output->img.csp & X264_CSP_HIGH_DEPTH) )
980+ {
981+ scale_image( &h->buffer.img, &output->img );
982+ output->img = h->buffer.img;
983+ }
984+ return 0;
985+}
986+
987+static int release_frame( hnd_t handle, cli_pic_t *pic, int frame )
988+{
989+ depth_hnd_t *h = handle;
990+ return h->prev_filter.release_frame( h->prev_hnd, pic, frame );
991+}
992+
993+static void free_filter( hnd_t handle )
994+{
995+ depth_hnd_t *h = handle;
996+ h->prev_filter.free( h->prev_hnd );
997+ x264_cli_pic_clean( &h->buffer );
998+ x264_free( h );
999+}
1000+
1001+static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info,
1002+ x264_param_t *param, char *opt_string )
1003+{
1004+ int ret = 0;
1005+ int change_fmt = (info->csp ^ param->i_csp) & X264_CSP_HIGH_DEPTH;
1006+ int csp = ~(~info->csp ^ change_fmt);
1007+ int bit_depth = 8*x264_cli_csp_depth_factor( csp );
1008+
1009+ if( opt_string )
1010+ {
1011+ static const char *optlist[] = { "bit_depth", NULL };
1012+ char **opts = x264_split_options( opt_string, optlist );
1013+
1014+ if( opts )
1015+ {
1016+ char *str_bit_depth = x264_get_option( "bit_depth", opts );
1017+ bit_depth = x264_otoi( str_bit_depth, -1 );
1018+
1019+ ret = bit_depth < 8 || bit_depth > 16;
1020+ csp = bit_depth > 8 ? csp | X264_CSP_HIGH_DEPTH : csp & ~X264_CSP_HIGH_DEPTH;
1021+ change_fmt = (info->csp ^ csp) & X264_CSP_HIGH_DEPTH;
1022+ x264_free_string_array( opts );
1023+ }
1024+ else
1025+ ret = 1;
1026+ }
1027+
1028+ FAIL_IF_ERROR( bit_depth != BIT_DEPTH, "this build supports only bit depth %d\n", BIT_DEPTH )
1029+ FAIL_IF_ERROR( ret, "unsupported bit depth conversion.\n" )
1030+
1031+ /* only add the filter to the chain if it's needed */
1032+ if( change_fmt || bit_depth != 8 * x264_cli_csp_depth_factor( csp ) )
1033+ {
1034+ FAIL_IF_ERROR( !depth_filter_csp_is_supported(csp), "unsupported colorspace.\n" )
1035+ depth_hnd_t *h = x264_malloc( sizeof(depth_hnd_t) + (info->width+1)*sizeof(int16_t) );
1036+
1037+ if( !h )
1038+ return -1;
1039+
1040+ h->error_buf = (int16_t*)(h + 1);
1041+ h->dst_csp = csp;
1042+ h->bit_depth = bit_depth;
1043+ h->prev_hnd = *handle;
1044+ h->prev_filter = *filter;
1045+
1046+ if( x264_cli_pic_alloc( &h->buffer, h->dst_csp, info->width, info->height ) )
1047+ {
1048+ x264_free( h );
1049+ return -1;
1050+ }
1051+
1052+ *handle = h;
1053+ *filter = depth_filter;
1054+ info->csp = h->dst_csp;
1055+ }
1056+
1057+ return 0;
1058+}
1059+
1060+cli_vid_filter_t depth_filter = { NAME, NULL, init, get_frame, release_frame, free_filter, NULL };
1061diff --git a/filters/video/internal.c b/filters/video/internal.c
1062index 444ea1f..ef096dc 100644
1063--- a/filters/video/internal.c
1064+++ b/filters/video/internal.c
1065@@ -51,6 +51,7 @@ int x264_cli_pic_copy( cli_pic_t *out, cli_pic_t *in )
1066 {
1067 int height = in->img.height * x264_cli_csps[csp].height[i];
1068 int width = in->img.width * x264_cli_csps[csp].width[i];
1069+ width *= x264_cli_csp_depth_factor( in->img.csp );
1070 x264_cli_plane_copy( out->img.plane[i], out->img.stride[i], in->img.plane[i],
1071 in->img.stride[i], width, height );
1072 }
1073diff --git a/filters/video/resize.c b/filters/video/resize.c
1074index 38077b2..04b5e73 100644
1075--- a/filters/video/resize.c
1076+++ b/filters/video/resize.c
1077@@ -79,10 +79,21 @@ static void help( int longhelp )
1078 " - fittobox: resizes the video based on the desired contraints\n"
1079 " - width, height, both\n"
1080 " - fittobox and sar: same as above except with specified sar\n"
1081- " simultaneously converting to the given colorspace\n"
1082- " using resizer method [\"bicubic\"]\n"
1083- " - fastbilinear, bilinear, bicubic, experimental, point,\n"
1084- " - area, bicublin, gauss, sinc, lanczos, spline\n" );
1085+ " - csp: convert to the given csp. syntax: [name][:depth]\n"
1086+ " - valid csp names [keep current]: " );
1087+
1088+ for( int i = X264_CSP_NONE+1; i < X264_CSP_CLI_MAX; i++ )
1089+ {
1090+ printf( "%s", x264_cli_csps[i].name );
1091+ if( i+1 < X264_CSP_CLI_MAX )
1092+ printf( ", " );
1093+ }
1094+ printf( "\n"
1095+ " - depth: 8 or 16 bits per pixel [keep current]\n"
1096+ " note: not all depths are supported by all csps.\n"
1097+ " - method: use resizer method [\"bicubic\"]\n"
1098+ " - fastbilinear, bilinear, bicubic, experimental, point,\n"
1099+ " - area, bicublin, gauss, sinc, lanczos, spline\n" );
1100 }
1101
1102 static uint32_t convert_cpu_to_flag( uint32_t cpu )
1103@@ -131,13 +142,15 @@ static int convert_csp_to_pix_fmt( int csp )
1104 return csp&X264_CSP_MASK;
1105 switch( csp&X264_CSP_MASK )
1106 {
1107- case X264_CSP_I420: return PIX_FMT_YUV420P;
1108- case X264_CSP_I422: return PIX_FMT_YUV422P;
1109- case X264_CSP_I444: return PIX_FMT_YUV444P;
1110- case X264_CSP_NV12: return PIX_FMT_NV12;
1111- case X264_CSP_YV12: return PIX_FMT_YUV420P; /* specially handled via swapping chroma */
1112- case X264_CSP_BGR: return PIX_FMT_BGR24;
1113- case X264_CSP_BGRA: return PIX_FMT_BGRA;
1114+ case X264_CSP_YV12: /* specially handled via swapping chroma */
1115+ case X264_CSP_I420: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV420P16 : PIX_FMT_YUV420P;
1116+ case X264_CSP_I422: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV422P16 : PIX_FMT_YUV422P;
1117+ case X264_CSP_I444: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV444P16 : PIX_FMT_YUV444P;
1118+ case X264_CSP_RGB: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_RGB48 : PIX_FMT_RGB24;
1119+ /* the next 3 csps have no equivalent 16bit depth in swscale */
1120+ case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_NONE : PIX_FMT_NV12;
1121+ case X264_CSP_BGR: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_NONE : PIX_FMT_BGR24;
1122+ case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_NONE : PIX_FMT_BGRA;
1123 default: return PIX_FMT_NONE;
1124 }
1125 }
1126@@ -147,23 +160,30 @@ static int pick_closest_supported_csp( int csp )
1127 int pix_fmt = convert_csp_to_pix_fmt( csp );
1128 switch( pix_fmt )
1129 {
1130+ case PIX_FMT_YUV420P16LE:
1131+ case PIX_FMT_YUV420P16BE:
1132+ return X264_CSP_I420 | X264_CSP_HIGH_DEPTH;
1133 case PIX_FMT_YUV422P:
1134- case PIX_FMT_YUV422P16LE:
1135- case PIX_FMT_YUV422P16BE:
1136 case PIX_FMT_YUYV422:
1137 case PIX_FMT_UYVY422:
1138 return X264_CSP_I422;
1139+ case PIX_FMT_YUV422P16LE:
1140+ case PIX_FMT_YUV422P16BE:
1141+ return X264_CSP_I422 | X264_CSP_HIGH_DEPTH;
1142 case PIX_FMT_YUV444P:
1143+ return X264_CSP_I444;
1144 case PIX_FMT_YUV444P16LE:
1145 case PIX_FMT_YUV444P16BE:
1146- return X264_CSP_I444;
1147- case PIX_FMT_RGB24: // convert rgb to bgr
1148- case PIX_FMT_RGB48BE:
1149- case PIX_FMT_RGB48LE:
1150+ return X264_CSP_I444 | X264_CSP_HIGH_DEPTH;
1151+ case PIX_FMT_RGB24:
1152 case PIX_FMT_RGB565BE:
1153 case PIX_FMT_RGB565LE:
1154 case PIX_FMT_RGB555BE:
1155 case PIX_FMT_RGB555LE:
1156+ return X264_CSP_RGB;
1157+ case PIX_FMT_RGB48BE:
1158+ case PIX_FMT_RGB48LE:
1159+ return X264_CSP_RGB | X264_CSP_HIGH_DEPTH;
1160 case PIX_FMT_BGR24:
1161 case PIX_FMT_BGR565BE:
1162 case PIX_FMT_BGR565LE:
1163@@ -209,12 +229,27 @@ static int handle_opts( const char **optlist, char **opts, video_info_t *info, r
1164
1165 if( str_csp )
1166 {
1167- /* output csp was specified, lookup against valid values */
1168+ /* output csp was specified, first check if optional depth was provided */
1169+ char *str_depth = strchr( str_csp, ':' );
1170+ int depth = x264_cli_csp_depth_factor( info->csp ) * 8;
1171+ if( str_depth )
1172+ {
1173+ /* csp bit depth was specified */
1174+ *str_depth++ = '\0';
1175+ depth = x264_otoi( str_depth, -1 );
1176+ FAIL_IF_ERROR( depth != 8 && depth != 16, "unsupported bit depth %d\n", depth );
1177+ }
1178+ /* now lookup against the list of valid csps */
1179 int csp;
1180- for( csp = X264_CSP_CLI_MAX-1; x264_cli_csps[csp].name && strcasecmp( x264_cli_csps[csp].name, str_csp ); )
1181- csp--;
1182+ if( strlen( str_csp ) == 0 )
1183+ csp = info->csp & X264_CSP_MASK;
1184+ else
1185+ for( csp = X264_CSP_CLI_MAX-1; x264_cli_csps[csp].name && strcasecmp( x264_cli_csps[csp].name, str_csp ); )
1186+ csp--;
1187 FAIL_IF_ERROR( csp == X264_CSP_NONE, "unsupported colorspace `%s'\n", str_csp );
1188 h->dst_csp = csp;
1189+ if( depth == 16 )
1190+ h->dst_csp |= X264_CSP_HIGH_DEPTH;
1191 }
1192
1193 /* if the input sar is currently invalid, set it to 1:1 so it can be used in math */
1194@@ -366,8 +401,17 @@ static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info, x2
1195 h->swap_chroma = (info->csp & X264_CSP_MASK) == X264_CSP_YV12;
1196 int src_pix_fmt = convert_csp_to_pix_fmt( info->csp );
1197
1198+ int src_pix_fmt_inv = convert_csp_to_pix_fmt( info->csp ^ X264_CSP_HIGH_DEPTH );
1199+ int dst_pix_fmt_inv = convert_csp_to_pix_fmt( h->dst_csp ^ X264_CSP_HIGH_DEPTH );
1200+
1201 /* confirm swscale can support this conversion */
1202+ FAIL_IF_ERROR( src_pix_fmt == PIX_FMT_NONE && src_pix_fmt_inv != PIX_FMT_NONE,
1203+ "input colorspace %s with bit depth %d is not supported\n", sws_format_name( src_pix_fmt_inv ),
1204+ info->csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
1205 FAIL_IF_ERROR( !sws_isSupportedInput( src_pix_fmt ), "input colorspace %s is not supported\n", sws_format_name( src_pix_fmt ) )
1206+ FAIL_IF_ERROR( h->dst.pix_fmt == PIX_FMT_NONE && dst_pix_fmt_inv != PIX_FMT_NONE,
1207+ "input colorspace %s with bit depth %d is not supported\n", sws_format_name( dst_pix_fmt_inv ),
1208+ h->dst_csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
1209 FAIL_IF_ERROR( !sws_isSupportedOutput( h->dst.pix_fmt ), "output colorspace %s is not supported\n", sws_format_name( h->dst.pix_fmt ) )
1210 FAIL_IF_ERROR( h->dst.height != info->height && info->interlaced,
1211 "swscale is not compatible with interlaced vertical resizing\n" )
1212diff --git a/filters/video/video.c b/filters/video/video.c
1213index 61dc8c6..71ae01e 100644
1214--- a/filters/video/video.c
1215+++ b/filters/video/video.c
1216@@ -51,6 +51,7 @@ void x264_register_vid_filters()
1217 REGISTER_VFILTER( fix_vfr_pts );
1218 REGISTER_VFILTER( resize );
1219 REGISTER_VFILTER( select_every );
1220+ REGISTER_VFILTER( depth );
1221 #if HAVE_GPL
1222 #endif
1223 }
1224diff --git a/input/input.c b/input/input.c
1225index 78c7a88..a14bd3c 100644
1226--- a/input/input.c
1227+++ b/input/input.c
1228@@ -32,7 +32,8 @@ const x264_cli_csp_t x264_cli_csps[] = {
1229 [X264_CSP_YV12] = { "yv12", 3, { 1, .5, .5 }, { 1, .5, .5 }, 2, 2 },
1230 [X264_CSP_NV12] = { "nv12", 2, { 1, 1 }, { 1, .5 }, 2, 2 },
1231 [X264_CSP_BGR] = { "bgr", 1, { 3 }, { 1 }, 1, 1 },
1232- [X264_CSP_BGRA] = { "bgra", 1, { 4 }, { 1 }, 1, 1 }
1233+ [X264_CSP_BGRA] = { "bgra", 1, { 4 }, { 1 }, 1, 1 },
1234+ [X264_CSP_RGB] = { "rgb", 1, { 3 }, { 1 }, 1, 1 },
1235 };
1236
1237 int x264_cli_csp_is_invalid( int csp )
1238@@ -41,6 +42,13 @@ int x264_cli_csp_is_invalid( int csp )
1239 return csp_mask <= X264_CSP_NONE || csp_mask >= X264_CSP_CLI_MAX || csp & X264_CSP_OTHER;
1240 }
1241
1242+int x264_cli_csp_depth_factor( int csp )
1243+{
1244+ if( x264_cli_csp_is_invalid( csp ) )
1245+ return 0;
1246+ return (csp & X264_CSP_HIGH_DEPTH) ? 2 : 1;
1247+}
1248+
1249 uint64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane )
1250 {
1251 int csp_mask = csp & X264_CSP_MASK;
1252@@ -48,6 +56,7 @@ uint64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane )
1253 return 0;
1254 uint64_t size = (uint64_t)width * height;
1255 size *= x264_cli_csps[csp_mask].width[plane] * x264_cli_csps[csp_mask].height[plane];
1256+ size *= x264_cli_csp_depth_factor( csp );
1257 return size;
1258 }
1259
1260@@ -78,7 +87,7 @@ int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height )
1261 pic->img.plane[i] = x264_malloc( x264_cli_pic_plane_size( csp, width, height, i ) );
1262 if( !pic->img.plane[i] )
1263 return -1;
1264- pic->img.stride[i] = width * x264_cli_csps[csp_mask].width[i];
1265+ pic->img.stride[i] = width * x264_cli_csps[csp_mask].width[i] * x264_cli_csp_depth_factor( csp );
1266 }
1267
1268 return 0;
1269diff --git a/input/input.h b/input/input.h
1270index 972dd9c..43826d7 100644
1271--- a/input/input.h
1272+++ b/input/input.h
1273@@ -36,6 +36,7 @@ typedef struct
1274 char *index_file;
1275 char *resolution;
1276 char *colorspace;
1277+ int bit_depth;
1278 char *timebase;
1279 int seek;
1280 } cli_input_opt_t;
1281@@ -103,8 +104,9 @@ extern cli_input_t input;
1282 #define X264_CSP_I444 (X264_CSP_MAX+1) /* yuv 4:4:4 planar */
1283 #define X264_CSP_BGR (X264_CSP_MAX+2) /* packed bgr 24bits */
1284 #define X264_CSP_BGRA (X264_CSP_MAX+3) /* packed bgr 32bits */
1285-#define X264_CSP_CLI_MAX (X264_CSP_MAX+4) /* end of list */
1286-#define X264_CSP_OTHER 0x2000 /* non x264 colorspace */
1287+#define X264_CSP_RGB (X264_CSP_MAX+4) /* packed rgb 24bits */
1288+#define X264_CSP_CLI_MAX (X264_CSP_MAX+5) /* end of list */
1289+#define X264_CSP_OTHER 0x4000 /* non x264 colorspace */
1290
1291 typedef struct
1292 {
1293@@ -119,6 +121,7 @@ typedef struct
1294 extern const x264_cli_csp_t x264_cli_csps[];
1295
1296 int x264_cli_csp_is_invalid( int csp );
1297+int x264_cli_csp_depth_factor( int csp );
1298 int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height );
1299 void x264_cli_pic_clean( cli_pic_t *pic );
1300 uint64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane );
1301diff --git a/input/raw.c b/input/raw.c
1302index f5fbed6..9617fb1 100644
1303--- a/input/raw.c
1304+++ b/input/raw.c
1305@@ -34,11 +34,12 @@ typedef struct
1306 int next_frame;
1307 uint64_t plane_size[4];
1308 uint64_t frame_size;
1309+ int bit_depth;
1310 } raw_hnd_t;
1311
1312 static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, cli_input_opt_t *opt )
1313 {
1314- raw_hnd_t *h = malloc( sizeof(raw_hnd_t) );
1315+ raw_hnd_t *h = calloc( 1, sizeof(raw_hnd_t) );
1316 if( !h )
1317 return -1;
1318
1319@@ -61,8 +62,10 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1320 else /* default */
1321 info->csp = X264_CSP_I420;
1322
1323- h->next_frame = 0;
1324- info->vfr = 0;
1325+ h->bit_depth = opt->bit_depth;
1326+ FAIL_IF_ERROR( h->bit_depth < 8 || h->bit_depth > 16, "unsupported bit depth `%d'\n", h->bit_depth );
1327+ if( h->bit_depth > 8 )
1328+ info->csp |= X264_CSP_HIGH_DEPTH;
1329
1330 if( !strcmp( psz_filename, "-" ) )
1331 h->fh = stdin;
1332@@ -73,11 +76,15 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1333
1334 info->thread_safe = 1;
1335 info->num_frames = 0;
1336- h->frame_size = 0;
1337- for( int i = 0; i < x264_cli_csps[info->csp].planes; i++ )
1338+ info->vfr = 0;
1339+
1340+ const x264_cli_csp_t *csp = x264_cli_get_csp( info->csp );
1341+ for( int i = 0; i < csp->planes; i++ )
1342 {
1343 h->plane_size[i] = x264_cli_pic_plane_size( info->csp, info->width, info->height, i );
1344 h->frame_size += h->plane_size[i];
1345+ /* x264_cli_pic_plane_size returns the size in bytes, we need the value in pixels from here on */
1346+ h->plane_size[i] /= x264_cli_csp_depth_factor( info->csp );
1347 }
1348
1349 if( x264_is_regular_file( h->fh ) )
1350@@ -95,8 +102,22 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1351 static int read_frame_internal( cli_pic_t *pic, raw_hnd_t *h )
1352 {
1353 int error = 0;
1354+ int pixel_depth = x264_cli_csp_depth_factor( pic->img.csp );
1355 for( int i = 0; i < pic->img.planes && !error; i++ )
1356- error |= fread( pic->img.plane[i], h->plane_size[i], 1, h->fh ) <= 0;
1357+ {
1358+ error |= fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i];
1359+ if( h->bit_depth & 7 )
1360+ {
1361+ /* upconvert non 16bit high depth planes to 16bit using the same
1362+ * algorithm as used in the depth filter. */
1363+ uint16_t *plane = (uint16_t*)pic->img.plane[i];
1364+ uint64_t pixel_count = h->plane_size[i];
1365+ int lshift = 16 - h->bit_depth;
1366+ int rshift = 2*h->bit_depth - 16;
1367+ for( uint64_t j = 0; j < pixel_count; j++ )
1368+ plane[j] = (plane[j] << lshift) + (plane[j] >> rshift);
1369+ }
1370+ }
1371 return error;
1372 }
1373
1374diff --git a/x264.c b/x264.c
1375index 5bd2af7..415181c 100644
1376--- a/x264.c
1377+++ b/x264.c
1378@@ -214,7 +214,7 @@ static void print_version_info()
1379 #else
1380 printf( "using a non-gcc compiler\n" );
1381 #endif
1382- printf( "configuration: --bit-depth=%d\n", BIT_DEPTH );
1383+ printf( "configuration: --bit-depth=%d\n", x264_bit_depth );
1384 printf( "x264 license: " );
1385 #if HAVE_GPL
1386 printf( "GPL version 2 or later\n" );
1387@@ -375,7 +375,7 @@ static void Help( x264_param_t *defaults, int longhelp )
1388 #else
1389 "no",
1390 #endif
1391- BIT_DEPTH
1392+ x264_bit_depth
1393 );
1394 H0( "Example usage:\n" );
1395 H0( "\n" );
1396@@ -697,6 +697,7 @@ static void Help( x264_param_t *defaults, int longhelp )
1397 " - %s\n", demuxer_names[0], stringify_names( buf, demuxer_names ) );
1398 H1( " --input-csp <string> Specify input colorspace format for raw input\n" );
1399 print_csp_names( longhelp );
1400+ H1( " --input-depth <integer> Specify input bit depth for raw input\n" );
1401 H1( " --input-res <intxint> Specify input resolution (width x height)\n" );
1402 H1( " --index <string> Filename for input index file\n" );
1403 H0( " --sar width:height Specify Sample Aspect Ratio\n" );
1404@@ -769,7 +770,8 @@ enum {
1405 OPT_LOG_LEVEL,
1406 OPT_VIDEO_FILTER,
1407 OPT_INPUT_RES,
1408- OPT_INPUT_CSP
1409+ OPT_INPUT_CSP,
1410+ OPT_INPUT_DEPTH
1411 } OptionsOPT;
1412
1413 static char short_options[] = "8A:B:b:f:hI:i:m:o:p:q:r:t:Vvw";
1414@@ -921,6 +923,7 @@ static struct option long_options[] =
1415 { "video-filter", required_argument, NULL, OPT_VIDEO_FILTER },
1416 { "input-res", required_argument, NULL, OPT_INPUT_RES },
1417 { "input-csp", required_argument, NULL, OPT_INPUT_CSP },
1418+ { "input-depth", required_argument, NULL, OPT_INPUT_DEPTH },
1419 {0, 0, 0, 0}
1420 };
1421
1422@@ -1082,10 +1085,16 @@ static int init_vid_filters( char *sequence, hnd_t *handle, video_info_t *info,
1423 if( csp > X264_CSP_NONE && csp < X264_CSP_MAX )
1424 param->i_csp = info->csp;
1425 else
1426- param->i_csp = X264_CSP_I420;
1427+ param->i_csp = X264_CSP_I420 | ( info->csp & X264_CSP_HIGH_DEPTH );
1428 if( x264_init_vid_filter( "resize", handle, &filter, info, param, NULL ) )
1429 return -1;
1430
1431+ char args[20];
1432+ sprintf( args, "bit_depth=%d", x264_bit_depth );
1433+
1434+ if( x264_init_vid_filter( "depth", handle, &filter, info, param, args ) )
1435+ return -1;
1436+
1437 return 0;
1438 }
1439
1440@@ -1138,6 +1147,7 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
1441
1442 memset( opt, 0, sizeof(cli_opt_t) );
1443 memset( &input_opt, 0, sizeof(cli_input_opt_t) );
1444+ input_opt.bit_depth = 8;
1445 opt->b_progress = 1;
1446
1447 /* Presets are applied before all other options. */
1448@@ -1283,6 +1293,9 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
1449 case OPT_INPUT_CSP:
1450 input_opt.colorspace = optarg;
1451 break;
1452+ case OPT_INPUT_DEPTH:
1453+ input_opt.bit_depth = atoi( optarg );
1454+ break;
1455 default:
1456 generic_option:
1457 {
1458diff --git a/x264.h b/x264.h
1459index 56d424c..c9b182a 100644
1460--- a/x264.h
1461+++ b/x264.h
1462@@ -180,7 +180,8 @@ static const char * const x264_open_gop_names[] = { "none", "normal", "bluray",
1463 #define X264_CSP_YV12 0x0002 /* yvu 4:2:0 planar */
1464 #define X264_CSP_NV12 0x0003 /* yuv 4:2:0, with one y plane and one packed u+v */
1465 #define X264_CSP_MAX 0x0004 /* end of list */
1466-#define X264_CSP_VFLIP 0x1000 /* */
1467+#define X264_CSP_VFLIP 0x1000 /* the csp is vertically flipped */
1468+#define X264_CSP_HIGH_DEPTH 0x2000 /* the csp has a depth of 16 bits per pixel component */
1469
1470 /* Slice type */
1471 #define X264_TYPE_AUTO 0x0000 /* Let x264 choose the right type */
1472@@ -342,7 +343,7 @@ typedef struct x264_param_t
1473 {
1474 int i_rc_method; /* X264_RC_* */
1475
1476- int i_qp_constant; /* 0 to (51 + 6*(BIT_DEPTH-8)) */
1477+ int i_qp_constant; /* 0 to (51 + 6*(x264_bit_depth-8)) */
1478 int i_qp_min; /* min allowed QP value */
1479 int i_qp_max; /* max allowed QP value */
1480 int i_qp_step; /* max QP step between frames */
1481@@ -566,6 +567,15 @@ int x264_param_apply_profile( x264_param_t *, const char *profile );
1482 * Picture structures and functions
1483 ****************************************************************************/
1484
1485+/* x264_bit_depth:
1486+ * Specifies the number of bits per pixel that x264 uses. This is also the
1487+ * bit depth that x264 encodes in. If this value is > 8, x264 will read
1488+ * two bytes of input data for each pixel sample, and expect the upper
1489+ * (16-x264_bit_depth) bits to be zero.
1490+ * Note: The flag X264_CSP_HIGH_DEPTH must be used to specify the
1491+ * colorspace depth as well. */
1492+extern const int x264_bit_depth;
1493+
1494 enum pic_struct_e
1495 {
1496 PIC_STRUCT_AUTO = 0, // automatically decide (default)
1497--
14981.7.2.3
1499
1500
1501From 195cf9bd51203eb18cdff5542b27caf635e7b1cf Mon Sep 17 00:00:00 2001
1502From: Jason Garrett-Glaser <darkshikari@gmail.com>
1503Date: Sun, 26 Sep 2010 21:04:30 -0700
1504Subject: [PATCH 8/8] Add High 10 Intra profile support (AVC-Intra)
1505 x264 should now be able to encode compliant AVC-Intra 50.
1506 With a 10-bit-compiled version of x264, a sample commandline for 1080i25 might be:
1507 --interlaced --keyint 1 --vbv-bufsize 2000 --bitrate 50000 --vbv-maxrate 50000 --nal-hrd cbr
1508
1509Also print "Constrained Baseline" for baseline profile, since that's all x264 (and everything else in the world) supports.
1510Also reorganize parameter validation a bit to reduce some spurious warnings.
1511---
1512 encoder/encoder.c | 14 +++++++++-----
1513 encoder/set.c | 15 ++++++++++-----
1514 2 files changed, 19 insertions(+), 10 deletions(-)
1515
1516diff --git a/encoder/encoder.c b/encoder/encoder.c
1517index 62a4350..a1e8383 100644
1518--- a/encoder/encoder.c
1519+++ b/encoder/encoder.c
1520@@ -439,6 +439,13 @@ static int x264_validate_parameters( x264_t *h )
1521 if( h->i_thread_frames > 1 )
1522 h->param.nalu_process = NULL;
1523
1524+ h->param.i_keyint_max = x264_clip3( h->param.i_keyint_max, 1, X264_KEYINT_MAX_INFINITE );
1525+ if( h->param.i_keyint_max == 1 )
1526+ {
1527+ h->param.b_intra_refresh = 0;
1528+ h->param.analyse.i_weighted_pred = 0;
1529+ }
1530+
1531 if( h->param.b_interlaced )
1532 {
1533 if( h->param.analyse.i_me_method >= X264_ME_ESA )
1534@@ -576,7 +583,6 @@ static int x264_validate_parameters( x264_t *h )
1535
1536 h->param.i_frame_reference = x264_clip3( h->param.i_frame_reference, 1, X264_REF_MAX );
1537 h->param.i_dpb_size = x264_clip3( h->param.i_dpb_size, 1, X264_REF_MAX );
1538- h->param.i_keyint_max = x264_clip3( h->param.i_keyint_max, 1, X264_KEYINT_MAX_INFINITE );
1539 if( h->param.i_scenecut_threshold < 0 )
1540 h->param.i_scenecut_threshold = 0;
1541 if( !h->param.analyse.i_subpel_refine && h->param.analyse.i_direct_mv_pred > X264_DIRECT_PRED_SPATIAL )
1542@@ -586,8 +592,6 @@ static int x264_validate_parameters( x264_t *h )
1543 }
1544 h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_MIN( X264_BFRAME_MAX, h->param.i_keyint_max-1 ) );
1545 h->param.i_open_gop = x264_clip3( h->param.i_open_gop, X264_OPEN_GOP_NONE, X264_OPEN_GOP_BLURAY );
1546- if( h->param.i_keyint_max == 1 )
1547- h->param.b_intra_refresh = 0;
1548 h->param.i_bframe_bias = x264_clip3( h->param.i_bframe_bias, -90, 100 );
1549 if( h->param.i_bframe <= 1 )
1550 h->param.i_bframe_pyramid = X264_B_PYRAMID_NONE;
1551@@ -1155,10 +1159,10 @@ x264_t *x264_encoder_open( x264_param_t *param )
1552 fclose( f );
1553 }
1554
1555- const char *profile = h->sps->i_profile_idc == PROFILE_BASELINE ? "Baseline" :
1556+ const char *profile = h->sps->i_profile_idc == PROFILE_BASELINE ? "Constrained Baseline" :
1557 h->sps->i_profile_idc == PROFILE_MAIN ? "Main" :
1558 h->sps->i_profile_idc == PROFILE_HIGH ? "High" :
1559- h->sps->i_profile_idc == PROFILE_HIGH10 ? "High 10" :
1560+ h->sps->i_profile_idc == PROFILE_HIGH10 ? (h->sps->b_constraint_set3 == 1 ? "High 10 Intra" : "High 10") :
1561 "High 4:4:4 Predictive";
1562 char level[4];
1563 snprintf( level, sizeof(level), "%d.%d", h->sps->i_level_idc/10, h->sps->i_level_idc%10 );
1564diff --git a/encoder/set.c b/encoder/set.c
1565index a003012..0a24bf7 100644
1566--- a/encoder/set.c
1567+++ b/encoder/set.c
1568@@ -121,17 +121,17 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
1569 sps->b_constraint_set1 = sps->i_profile_idc <= PROFILE_MAIN;
1570 /* Never set constraint_set2, it is not necessary and not used in real world. */
1571 sps->b_constraint_set2 = 0;
1572+ sps->b_constraint_set3 = 0;
1573
1574+ sps->i_level_idc = param->i_level_idc;
1575 if( param->i_level_idc == 9 && ( sps->i_profile_idc >= PROFILE_BASELINE && sps->i_profile_idc <= PROFILE_EXTENDED ) )
1576 {
1577 sps->b_constraint_set3 = 1; /* level 1b with Baseline, Main or Extended profile is signalled via constraint_set3 */
1578 sps->i_level_idc = 11;
1579 }
1580- else
1581- {
1582- sps->b_constraint_set3 = 0;
1583- sps->i_level_idc = param->i_level_idc;
1584- }
1585+ /* High 10 Intra profile */
1586+ if( param->i_keyint_max == 1 && sps->i_profile_idc == PROFILE_HIGH10 )
1587+ sps->b_constraint_set3 = 1;
1588
1589 sps->vui.i_num_reorder_frames = param->i_bframe_pyramid ? 2 : param->i_bframe ? 1 : 0;
1590 /* extra slot with pyramid so that we don't have to override the
1591@@ -140,6 +140,11 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
1592 sps->i_num_ref_frames = X264_MIN(X264_REF_MAX, X264_MAX4(param->i_frame_reference, 1 + sps->vui.i_num_reorder_frames,
1593 param->i_bframe_pyramid ? 4 : 1, param->i_dpb_size));
1594 sps->i_num_ref_frames -= param->i_bframe_pyramid == X264_B_PYRAMID_STRICT;
1595+ if( param->i_keyint_max == 1 )
1596+ {
1597+ sps->i_num_ref_frames = 0;
1598+ sps->vui.i_max_dec_frame_buffering = 0;
1599+ }
1600
1601 /* number of refs + current frame */
1602 int max_frame_num = sps->vui.i_max_dec_frame_buffering * (!!param->i_bframe_pyramid+1) + 1;
1603--
16041.7.2.3