LHDjUG58

· 8 years ago · Jun 17, 2017, 09:32 PM
1From 5f104e9957cc4b69f7197fecf93648a0e2ae0e59 Mon Sep 17 00:00:00 2001
2From: Anton Mitrofanov <BugMaster@narod.ru>
3Date: Mon, 20 Sep 2010 13:10:13 +0400
4Subject: [PATCH 1/8] Fix DTS/bitrate calculation if the first PTS wasn't zero
5 Fix bitrate calculation with DTS compression.
6
7---
8 common/common.h   |    1 +
9 encoder/encoder.c |   11 +++++++----
10 x264.c            |   10 ++++++----
11 x264.h            |    2 +-
12 4 files changed, 15 insertions(+), 9 deletions(-)
13
14diff --git a/common/common.h b/common/common.h
15index efb25be..132cfee 100644
16--- a/common/common.h
17+++ b/common/common.h
18@@ -499,6 +499,7 @@ struct x264_t
19         int i_delay;    /* Number of frames buffered for B reordering */
20         int     i_bframe_delay;
21         int64_t i_bframe_delay_time;
22+        int64_t i_first_pts;
23         int64_t i_init_delta;
24         int64_t i_prev_reordered_pts[2];
25         int64_t i_largest_pts;
26diff --git a/encoder/encoder.c b/encoder/encoder.c
27index fa4401b..2b679a0 100644
28--- a/encoder/encoder.c
29+++ b/encoder/encoder.c
30@@ -2329,8 +2329,10 @@ int     x264_encoder_encode( x264_t *h,
31 
32         fenc->i_frame = h->frames.i_input++;
33 
34+        if( fenc->i_frame == 0 )
35+            h->frames.i_first_pts = fenc->i_pts;
36         if( h->frames.i_bframe_delay && fenc->i_frame == h->frames.i_bframe_delay )
37-            h->frames.i_bframe_delay_time = fenc->i_pts;
38+            h->frames.i_bframe_delay_time = fenc->i_pts - h->frames.i_first_pts;
39 
40         if( h->param.b_vfr_input && fenc->i_pts <= h->frames.i_largest_pts )
41             x264_log( h, X264_LOG_WARNING, "non-strictly-monotonic PTS\n" );
42@@ -2495,8 +2497,8 @@ int     x264_encoder_encode( x264_t *h,
43             {
44                 /* DTS compression */
45                 if( h->i_frame == 1 )
46-                    thread_current->frames.i_init_delta = h->fenc->i_reordered_pts * h->i_dts_compress_multiplier;
47-                h->fdec->i_dts = h->i_frame * thread_current->frames.i_init_delta / h->i_dts_compress_multiplier;
48+                    thread_current->frames.i_init_delta = (h->fenc->i_reordered_pts - h->frames.i_first_pts) * h->i_dts_compress_multiplier;
49+                h->fdec->i_dts = h->i_frame * thread_current->frames.i_init_delta / h->i_dts_compress_multiplier + h->frames.i_first_pts * h->i_dts_compress_multiplier;
50             }
51         }
52         else
53@@ -3110,7 +3112,8 @@ void    x264_encoder_close  ( x264_t *h )
54             f_bitrate = fps * SUM3(h->stat.i_frame_size) / i_count / 125;
55         else
56         {
57-            float duration = (float)(2 * h->frames.i_largest_pts - h->frames.i_second_largest_pts) * h->param.i_timebase_num / h->param.i_timebase_den;
58+            float duration = (float)(2 * h->frames.i_largest_pts - h->frames.i_second_largest_pts - h->frames.i_first_pts)
59+                           * h->i_dts_compress_multiplier * h->param.i_timebase_num / h->param.i_timebase_den;
60             f_bitrate = SUM3(h->stat.i_frame_size) / duration / 125;
61         }
62 
63diff --git a/x264.c b/x264.c
64index a1e7147..f74f096 100644
65--- a/x264.c
66+++ b/x264.c
67@@ -1584,7 +1584,7 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
68     int64_t second_largest_pts = -1;
69     int64_t ticks_per_frame;
70     double  duration;
71-    int     prev_timebase_den = param->i_timebase_den / gcd( param->i_timebase_num, param->i_timebase_den );
72+    int     prev_timebase_den;
73     int     dts_compress_multiplier;
74     double  pulldown_pts = 0;
75 
76@@ -1603,6 +1603,8 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
77         param->i_timebase_den = param->i_fps_num * pulldown->fps_factor;
78     }
79 
80+    prev_timebase_den = param->i_timebase_den / gcd( param->i_timebase_num, param->i_timebase_den );
81+
82     if( ( h = x264_encoder_open( param ) ) == NULL )
83     {
84         x264_cli_log( "x264", X264_LOG_ERROR, "x264_encoder_open failed\n" );
85@@ -1727,6 +1729,8 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
86     if( pts_warning_cnt >= MAX_PTS_WARNING && cli_log_level < X264_LOG_DEBUG )
87         x264_cli_log( "x264", X264_LOG_WARNING, "%d suppressed nonmonotonic pts warnings\n", pts_warning_cnt-MAX_PTS_WARNING );
88 
89+    largest_pts *= dts_compress_multiplier;
90+    second_largest_pts *= dts_compress_multiplier;
91     /* duration algorithm fails when only 1 frame is output */
92     if( i_frame_output == 1 )
93         duration = (double)param->i_fps_den / param->i_fps_num;
94@@ -1734,8 +1738,6 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
95         duration = (double)(2 * last_dts - prev_dts - first_dts) * param->i_timebase_num / param->i_timebase_den;
96     else
97         duration = (double)(2 * largest_pts - second_largest_pts) * param->i_timebase_num / param->i_timebase_den;
98-    if( !(opt->i_pulldown && !param->b_vfr_input) )
99-        duration *= dts_compress_multiplier;
100 
101     i_end = x264_mdate();
102     /* Erase progress indicator before printing encoding stats. */
103@@ -1754,7 +1756,7 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
104     }
105 
106     filter.free( opt->hin );
107-    output.close_file( opt->hout, largest_pts * dts_compress_multiplier, second_largest_pts * dts_compress_multiplier );
108+    output.close_file( opt->hout, largest_pts, second_largest_pts );
109 
110     if( i_frame_output > 0 )
111     {
112diff --git a/x264.h b/x264.h
113index 8174015..71f5f55 100644
114--- a/x264.h
115+++ b/x264.h
116@@ -653,7 +653,7 @@ typedef struct
117     int     b_keyframe;
118     /* In: user pts, Out: pts of encoded picture (user)*/
119     int64_t i_pts;
120-    /* Out: frame dts. Since the pts of the first frame is always zero,
121+    /* Out: frame dts. When the pts of the first frame is close to zero,
122      *      initial frames may have a negative dts which must be dealt with by any muxer */
123     int64_t i_dts;
124     /* In: custom encoding parameters to be set from this frame forwards
125-- 
1261.7.2.3
127
128
129From 5cba26f757ec00a7b95656615813e692685ee138 Mon Sep 17 00:00:00 2001
130From: Anton Mitrofanov <BugMaster@narod.ru>
131Date: Sat, 25 Sep 2010 15:55:32 -0700
132Subject: [PATCH 2/8] Fix CFR ratecontrol with timebase != 1/fps
133 Fixes VBV + DTS compression, among other things.
134
135---
136 encoder/encoder.c   |    2 +-
137 encoder/slicetype.c |    4 ++--
138 x264.c              |    1 +
139 x264.h              |    4 +++-
140 4 files changed, 7 insertions(+), 4 deletions(-)
141
142diff --git a/encoder/encoder.c b/encoder/encoder.c
143index 2b679a0..3570776 100644
144--- a/encoder/encoder.c
145+++ b/encoder/encoder.c
146@@ -626,7 +626,7 @@ static int x264_validate_parameters( x264_t *h )
147         h->param.rc.i_lookahead = X264_MIN( h->param.rc.i_lookahead, X264_MAX( h->param.i_keyint_max, bufsize*fps ) );
148     }
149 
150-    if( !h->param.i_timebase_num || !h->param.i_timebase_den )
151+    if( !h->param.i_timebase_num || !h->param.i_timebase_den || !(h->param.b_vfr_input || h->param.b_pulldown) )
152     {
153         h->param.i_timebase_num = h->param.i_fps_den;
154         h->param.i_timebase_den = h->param.i_fps_num;
155diff --git a/encoder/slicetype.c b/encoder/slicetype.c
156index 0d87908..d08cf02 100644
157--- a/encoder/slicetype.c
158+++ b/encoder/slicetype.c
159@@ -1260,12 +1260,12 @@ void x264_slicetype_decide( x264_t *h )
160             if( h->param.b_vfr_input )
161             {
162                 if( lookahead_size-- > 1 )
163-                    h->lookahead->next.list[i]->i_duration = 2 * (h->lookahead->next.list[i+1]->i_pts - h->lookahead->next.list[i]->i_pts);
164+                    h->lookahead->next.list[i]->i_duration = 2 * (h->lookahead->next.list[i+1]->i_pts - h->lookahead->next.list[i]->i_pts) * h->i_dts_compress_multiplier;
165                 else
166                     h->lookahead->next.list[i]->i_duration = h->i_prev_duration;
167             }
168             else
169-                h->lookahead->next.list[i]->i_duration = delta_tfi_divisor[h->lookahead->next.list[i]->i_pic_struct];
170+                h->lookahead->next.list[i]->i_duration = delta_tfi_divisor[h->lookahead->next.list[i]->i_pic_struct] * h->i_dts_compress_multiplier;
171             h->i_prev_duration = h->lookahead->next.list[i]->i_duration;
172 
173             if( h->lookahead->next.list[i]->i_frame > h->i_disp_fields_last_frame && lookahead_size > 0 )
174diff --git a/x264.c b/x264.c
175index f74f096..5bd2af7 100644
176--- a/x264.c
177+++ b/x264.c
178@@ -1595,6 +1595,7 @@ static int  Encode( x264_param_t *param, cli_opt_t *opt )
179     /* set up pulldown */
180     if( opt->i_pulldown && !param->b_vfr_input )
181     {
182+        param->b_pulldown = 1;
183         param->b_pic_struct = 1;
184         pulldown = &pulldown_values[opt->i_pulldown];
185         param->i_timebase_num = param->i_fps_den;
186diff --git a/x264.h b/x264.h
187index 71f5f55..56d424c 100644
188--- a/x264.h
189+++ b/x264.h
190@@ -383,7 +383,9 @@ typedef struct x264_param_t
191     int b_annexb;               /* if set, place start codes (4 bytes) before NAL units,
192                                  * otherwise place size (4 bytes) before NAL units. */
193     int i_sps_id;               /* SPS and PPS id number */
194-    int b_vfr_input;            /* VFR input */
195+    int b_vfr_input;            /* VFR input.  If 1, use timebase and timestamps for ratecontrol purposes.
196+                                 * If 0, use fps only. */
197+    int b_pulldown;             /* use explicity set timebase for CFR */
198     uint32_t i_fps_num;
199     uint32_t i_fps_den;
200     uint32_t i_timebase_num;    /* Timebase numerator */
201-- 
2021.7.2.3
203
204
205From 54073becc7cfc3a1b574d954d1017cd58cbe8b2a Mon Sep 17 00:00:00 2001
206From: Jason Garrett-Glaser <darkshikari@gmail.com>
207Date: Mon, 27 Sep 2010 05:39:13 -0700
208Subject: [PATCH 3/8] Add missing emms for dump-yuv
209
210---
211 encoder/encoder.c |    3 +++
212 1 files changed, 3 insertions(+), 0 deletions(-)
213
214diff --git a/encoder/encoder.c b/encoder/encoder.c
215index 3570776..7f3d5bc 100644
216--- a/encoder/encoder.c
217+++ b/encoder/encoder.c
218@@ -2934,7 +2934,10 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
219          }
220 
221     if( h->param.psz_dump_yuv )
222+    {
223         x264_frame_dump( h );
224+        x264_emms();
225+    }
226 
227     return frame_size;
228 }
229-- 
2301.7.2.3
231
232
233From 991166c935516f19f65485ebb447f079aca41e0e Mon Sep 17 00:00:00 2001
234From: Jason Garrett-Glaser <darkshikari@gmail.com>
235Date: Tue, 21 Sep 2010 17:11:00 -0700
236Subject: [PATCH 4/8] Make slice-max-size more aggressive in considering escape bytes
237 The x264 assumption of randomly distributed escape bytes fails in the case of CABAC + an enormous number of identical macroblocks.
238 This patch attempts to compensate for this.
239 It is probably safe to assume in calling applications that x264 practically never violates the slice size limitation.
240
241---
242 encoder/encoder.c |   63 +++++++++++++++++++++++++++++++++-------------------
243 1 files changed, 40 insertions(+), 23 deletions(-)
244
245diff --git a/encoder/encoder.c b/encoder/encoder.c
246index 7f3d5bc..b9e66ac 100644
247--- a/encoder/encoder.c
248+++ b/encoder/encoder.c
249@@ -1834,10 +1834,12 @@ static int x264_slice_write( x264_t *h )
250     uint8_t cabac_prevbyte_bak = 0; /* Shut up GCC. */
251     int mv_bits_bak = 0;
252     int tex_bits_bak = 0;
253-    /* Assume no more than 3 bytes of NALU escaping.
254-     * NALUs other than the first use a 3-byte startcode. */
255-    int overhead_guess = (NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal)) + 3;
256-    int slice_max_size = h->param.i_slice_max_size > 0 ? (h->param.i_slice_max_size-overhead_guess)*8 : INT_MAX;
257+    /* NALUs other than the first use a 3-byte startcode.
258+     * Add one extra byte for the rbsp, and one more for the final CABAC putbyte.
259+     * Then add an extra 5 bytes just in case, to account for random NAL escapes and
260+     * other inaccuracies. */
261+    int overhead_guess = (NALU_OVERHEAD - (h->param.b_annexb && h->out.i_nal)) + 1 + h->param.b_cabac + 5;
262+    int slice_max_size = h->param.i_slice_max_size > 0 ? (h->param.i_slice_max_size-overhead_guess)*8 : 0;
263     int starting_bits = bs_pos(&h->out.bs);
264     int b_deblock = h->sh.i_disable_deblocking_filter_idc != 1;
265     int b_hpel = h->fdec->b_kept_as_ref;
266@@ -1884,7 +1886,7 @@ static int x264_slice_write( x264_t *h )
267         if( x264_bitstream_check_buffer( h ) )
268             return -1;
269 
270-        if( h->param.i_slice_max_size > 0 )
271+        if( slice_max_size )
272         {
273             mv_bits_bak = h->stat.frame.i_mv_bits;
274             tex_bits_bak = h->stat.frame.i_tex_bits;
275@@ -1948,35 +1950,50 @@ static int x264_slice_write( x264_t *h )
276         int total_bits = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac);
277         int mb_size = total_bits - mb_spos;
278 
279-        /* We'll just re-encode this last macroblock if we go over the max slice size. */
280-        if( total_bits - starting_bits > slice_max_size && !h->mb.b_reencode_mb )
281-        {
282-            if( mb_xy != h->sh.i_first_mb )
283+        if( slice_max_size )
284+        {
285+            /* Count the skip run, just in case. */
286+            if( !h->param.b_cabac )
287+                total_bits += bs_size_ue_big( i_skip );
288+            /* HACK: we assume no more than 3 bytes of NALU escaping, but
289+             * this can fail in CABAC streams with an extremely large number of identical
290+             * blocks in sequence (e.g. all-black intra blocks).
291+             * Thus, every 64 blocks, pretend we've used a byte.
292+             * For reference, a seqeuence of identical empty-CBP i16x16 blocks will use
293+             * one byte after 26 macroblocks, assuming a perfectly adapted CABAC.
294+             * That's 78 macroblocks to generate the 3-byte sequence to trigger an escape. */
295+            else if( ((mb_xy - h->sh.i_first_mb) & 63) == 63 )
296+                slice_max_size -= 8;
297+            /* We'll just re-encode this last macroblock if we go over the max slice size. */
298+            if( total_bits - starting_bits > slice_max_size && !h->mb.b_reencode_mb )
299             {
300-                h->stat.frame.i_mv_bits = mv_bits_bak;
301-                h->stat.frame.i_tex_bits = tex_bits_bak;
302-                if( h->param.b_cabac )
303+                if( mb_xy != h->sh.i_first_mb )
304                 {
305-                    memcpy( &h->cabac, &cabac_bak, offsetof(x264_cabac_t, f8_bits_encoded) );
306-                    h->cabac.p[-1] = cabac_prevbyte_bak;
307+                    h->stat.frame.i_mv_bits = mv_bits_bak;
308+                    h->stat.frame.i_tex_bits = tex_bits_bak;
309+                    if( h->param.b_cabac )
310+                    {
311+                        memcpy( &h->cabac, &cabac_bak, offsetof(x264_cabac_t, f8_bits_encoded) );
312+                        h->cabac.p[-1] = cabac_prevbyte_bak;
313+                    }
314+                    else
315+                    {
316+                        h->out.bs = bs_bak;
317+                        i_skip = i_skip_bak;
318+                    }
319+                    h->mb.b_reencode_mb = 1;
320+                    h->sh.i_last_mb = mb_xy-1;
321+                    break;
322                 }
323                 else
324                 {
325-                    h->out.bs = bs_bak;
326-                    i_skip = i_skip_bak;
327+                    h->sh.i_last_mb = mb_xy;
328+                    h->mb.b_reencode_mb = 0;
329                 }
330-                h->mb.b_reencode_mb = 1;
331-                h->sh.i_last_mb = mb_xy-1;
332-                break;
333             }
334             else
335-            {
336-                h->sh.i_last_mb = mb_xy;
337                 h->mb.b_reencode_mb = 0;
338-            }
339         }
340-        else
341-            h->mb.b_reencode_mb = 0;
342 
343 #if HAVE_VISUALIZE
344         if( h->param.b_visualize )
345-- 
3461.7.2.3
347
348
349From 3f15d8ea707d4985a38059fe58ce0a0993ceeb94 Mon Sep 17 00:00:00 2001
350From: Jason Garrett-Glaser <darkshikari@gmail.com>
351Date: Mon, 27 Sep 2010 05:39:02 -0700
352Subject: [PATCH 5/8] Various cosmetics
353
354---
355 encoder/encoder.c |   12 ++++++------
356 encoder/set.c     |   17 ++---------------
357 2 files changed, 8 insertions(+), 21 deletions(-)
358
359diff --git a/encoder/encoder.c b/encoder/encoder.c
360index b9e66ac..28ded05 100644
361--- a/encoder/encoder.c
362+++ b/encoder/encoder.c
363@@ -2943,12 +2943,12 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
364 
365     /* Remove duplicates, must be done near the end as breaks h->fref0 array
366      * by freeing some of its pointers. */
367-     for( int i = 0; i < h->i_ref0; i++ )
368-         if( h->fref0[i] && h->fref0[i]->b_duplicate )
369-         {
370-             x264_frame_push_blank_unused( h, h->fref0[i] );
371-             h->fref0[i] = 0;
372-         }
373+    for( int i = 0; i < h->i_ref0; i++ )
374+        if( h->fref0[i] && h->fref0[i]->b_duplicate )
375+        {
376+            x264_frame_push_blank_unused( h, h->fref0[i] );
377+            h->fref0[i] = 0;
378+        }
379 
380     if( h->param.psz_dump_yuv )
381     {
382diff --git a/encoder/set.c b/encoder/set.c
383index b0d2149..a003012 100644
384--- a/encoder/set.c
385+++ b/encoder/set.c
386@@ -294,34 +294,21 @@ void x264_sps_write( bs_t *s, x264_sps_t *sps )
387     }
388     else if( sps->i_poc_type == 1 )
389     {
390-        int i;
391-
392         bs_write( s, 1, sps->b_delta_pic_order_always_zero );
393         bs_write_se( s, sps->i_offset_for_non_ref_pic );
394         bs_write_se( s, sps->i_offset_for_top_to_bottom_field );
395         bs_write_ue( s, sps->i_num_ref_frames_in_poc_cycle );
396 
397-        for( i = 0; i < sps->i_num_ref_frames_in_poc_cycle; i++ )
398-        {
399+        for( int i = 0; i < sps->i_num_ref_frames_in_poc_cycle; i++ )
400             bs_write_se( s, sps->i_offset_for_ref_frame[i] );
401-        }
402     }
403     bs_write_ue( s, sps->i_num_ref_frames );
404     bs_write( s, 1, sps->b_gaps_in_frame_num_value_allowed );
405     bs_write_ue( s, sps->i_mb_width - 1 );
406-    if (sps->b_frame_mbs_only)
407-    {
408-        bs_write_ue( s, sps->i_mb_height - 1);
409-    }
410-    else // interlaced
411-    {
412-        bs_write_ue( s, sps->i_mb_height/2 - 1);
413-    }
414+    bs_write_ue( s, (sps->i_mb_height >> !sps->b_frame_mbs_only) - 1);
415     bs_write( s, 1, sps->b_frame_mbs_only );
416     if( !sps->b_frame_mbs_only )
417-    {
418         bs_write( s, 1, sps->b_mb_adaptive_frame_field );
419-    }
420     bs_write( s, 1, sps->b_direct8x8_inference );
421 
422     bs_write( s, 1, sps->b_crop );
423-- 
4241.7.2.3
425
426
427From 377efcd2643ba657a6d26c4599a9cc4022ca84e8 Mon Sep 17 00:00:00 2001
428From: Alex Wright <alexw0885@gmail.com>
429Date: Sun, 19 Sep 2010 05:08:22 -0700
430Subject: [PATCH 6/8] Chroma mode decision/subpel for B-frames
431 Improves compression ~0.4-1%. Helps more on videos with lots of chroma detail.
432 Enabled at subme 9 (preset slower) and higher.
433
434---
435 common/macroblock.c |    5 +-
436 encoder/analyse.c   |  118 +++++++++++++++++++++++++++++++++++++++++++++++++--
437 2 files changed, 117 insertions(+), 6 deletions(-)
438
439diff --git a/common/macroblock.c b/common/macroblock.c
440index b6c91d6..7f0348e 100644
441--- a/common/macroblock.c
442+++ b/common/macroblock.c
443@@ -448,8 +448,9 @@ void x264_macroblock_thread_init( x264_t *h )
444     h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
445     if( h->sh.i_type == SLICE_TYPE_B && (h->mb.i_subpel_refine == 6 || h->mb.i_subpel_refine == 8) )
446         h->mb.i_subpel_refine--;
447-    h->mb.b_chroma_me = h->param.analyse.b_chroma_me && h->sh.i_type == SLICE_TYPE_P
448-                        && h->mb.i_subpel_refine >= 5;
449+    h->mb.b_chroma_me = h->param.analyse.b_chroma_me &&
450+                        ((h->sh.i_type == SLICE_TYPE_P && h->mb.i_subpel_refine >= 5) ||
451+                         (h->sh.i_type == SLICE_TYPE_B && h->mb.i_subpel_refine >= 9));
452     h->mb.b_dct_decimate = h->sh.i_type == SLICE_TYPE_B ||
453                           (h->param.analyse.b_dct_decimate && h->sh.i_type != SLICE_TYPE_I);
454 
455diff --git a/encoder/analyse.c b/encoder/analyse.c
456index c4162e9..6ed13ba 100644
457--- a/encoder/analyse.c
458+++ b/encoder/analyse.c
459@@ -1679,6 +1679,37 @@ static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8
460         a->l0.i_cost4x8[i8x8] += x264_mb_analyse_inter_p4x4_chroma( h, a, p_fref, i8x8, PIXEL_4x8 );
461 }
462 
463+static ALWAYS_INLINE int x264_analyse_bi_chroma( x264_t *h, x264_mb_analysis_t *a, int idx, int i_pixel )
464+{
465+    ALIGNED_ARRAY_8( pixel, pix, [4],[8*8] );
466+    ALIGNED_ARRAY_8( pixel,  bi, [2],[8*8] );
467+    int l0_mvy_offset, l1_mvy_offset;
468+    int i_chroma_cost = 0;
469+
470+#define COST_BI_CHROMA( m0, m1, width, height ) \
471+{ \
472+    l0_mvy_offset = h->mb.b_interlaced & m0.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0; \
473+    l1_mvy_offset = h->mb.b_interlaced & m1.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0; \
474+    h->mc.mc_chroma( pix[0], pix[1], 8, m0.p_fref[4], m0.i_stride[1], m0.mv[0], m0.mv[1] + l0_mvy_offset, width, height ); \
475+    h->mc.mc_chroma( pix[2], pix[3], 8, m1.p_fref[4], m1.i_stride[1], m1.mv[0], m1.mv[1] + l1_mvy_offset, width, height ); \
476+    h->mc.avg[i_pixel+3]( bi[0], 8, pix[0], 8, pix[2], 8, h->mb.bipred_weight[m0.i_ref][m1.i_ref] ); \
477+    h->mc.avg[i_pixel+3]( bi[1], 8, pix[1], 8, pix[3], 8, h->mb.bipred_weight[m0.i_ref][m1.i_ref] ); \
478+    i_chroma_cost  = h->pixf.mbcmp[i_pixel+3]( m0.p_fenc[1], FENC_STRIDE, bi[0], 8 ); \
479+    i_chroma_cost += h->pixf.mbcmp[i_pixel+3]( m0.p_fenc[2], FENC_STRIDE, bi[1], 8 ); \
480+}
481+
482+    if( i_pixel == PIXEL_16x16 )
483+        COST_BI_CHROMA( a->l0.bi16x16, a->l1.bi16x16, 8, 8 )
484+    else if( i_pixel == PIXEL_16x8 )
485+        COST_BI_CHROMA( a->l0.me16x8[idx], a->l1.me16x8[idx], 8, 4 )
486+    else if( i_pixel == PIXEL_8x16 )
487+        COST_BI_CHROMA( a->l0.me8x16[idx], a->l1.me8x16[idx], 4, 8 )
488+    else
489+        COST_BI_CHROMA( a->l0.me8x8[idx], a->l1.me8x8[idx], 4, 4 )
490+
491+    return i_chroma_cost;
492+}
493+
494 static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
495 {
496     /* Assumes that fdec still contains the results of
497@@ -1693,15 +1724,29 @@ static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
498         {
499             const int x = (i&1)*8;
500             const int y = (i>>1)*8;
501-            a->i_cost16x16direct +=
502-            a->i_cost8x8direct[i] =
503-                h->pixf.mbcmp[PIXEL_8x8]( &p_fenc[x+y*FENC_STRIDE], FENC_STRIDE, &p_fdec[x+y*FDEC_STRIDE], FDEC_STRIDE );
504+            a->i_cost8x8direct[i] = h->pixf.mbcmp[PIXEL_8x8]( &p_fenc[x+y*FENC_STRIDE], FENC_STRIDE,
505+                                                              &p_fdec[x+y*FDEC_STRIDE], FDEC_STRIDE );
506+            if( h->mb.b_chroma_me )
507+            {
508+                a->i_cost8x8direct[i] += h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[1][(x>>1)+(y>>1)*FENC_STRIDE], FENC_STRIDE,
509+                                                                   &h->mb.pic.p_fdec[1][(x>>1)+(y>>1)*FDEC_STRIDE], FDEC_STRIDE )
510+                                      +  h->pixf.mbcmp[PIXEL_4x4]( &h->mb.pic.p_fenc[2][(x>>1)+(y>>1)*FENC_STRIDE], FENC_STRIDE,
511+                                                                   &h->mb.pic.p_fdec[2][(x>>1)+(y>>1)*FDEC_STRIDE], FDEC_STRIDE );
512+            }
513+            a->i_cost16x16direct += a->i_cost8x8direct[i];
514 
515             /* mb type cost */
516             a->i_cost8x8direct[i] += a->i_lambda * i_sub_mb_b_cost_table[D_DIRECT_8x8];
517         }
518     else
519+    {
520         a->i_cost16x16direct += h->pixf.mbcmp[PIXEL_16x16]( p_fenc, FENC_STRIDE, p_fdec, FDEC_STRIDE );
521+        if( h->mb.b_chroma_me )
522+        {
523+            a->i_cost16x16direct += h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fenc[1], FENC_STRIDE, h->mb.pic.p_fdec[1], FDEC_STRIDE )
524+                                 +  h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fenc[2], FENC_STRIDE, h->mb.pic.p_fdec[2], FDEC_STRIDE );
525+        }
526+    }
527 }
528 
529 static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
530@@ -1807,6 +1852,9 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
531                      + a->l0.bi16x16.cost_mv
532                      + a->l1.bi16x16.cost_mv;
533 
534+    if( h->mb.b_chroma_me )
535+        a->i_cost16x16bi += x264_analyse_bi_chroma( h, a, 0, PIXEL_16x16 );
536+
537     /* Always try the 0,0,0,0 vector; helps avoid errant motion vectors in fades */
538     if( M32( a->l0.bi16x16.mv ) | M32( a->l1.bi16x16.mv ) )
539     {
540@@ -1819,6 +1867,39 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
541                                 h->mb.bipred_weight[a->l0.bi16x16.i_ref][a->l1.bi16x16.i_ref] );
542         int cost00 = h->pixf.mbcmp[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE, pix0, 16 )
543                    + ref_costs + l0_mv_cost + l1_mv_cost;
544+
545+        if( h->mb.b_chroma_me )
546+        {
547+            ALIGNED_ARRAY_8( pixel, pixuv, [2],[8*FENC_STRIDE] );
548+            ALIGNED_ARRAY_8( pixel, bi, [8*FENC_STRIDE] );
549+
550+            if( h->mb.b_interlaced & a->l0.bi16x16.i_ref )
551+            {
552+                int l0_mvy_offset = h->mb.b_interlaced & a->l0.bi16x16.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
553+                h->mc.mc_chroma( pixuv[0], pixuv[0]+8, FENC_STRIDE, h->mb.pic.p_fref[0][a->l0.bi16x16.i_ref][4],
554+                                 h->mb.pic.i_stride[1], 0, 0 + l0_mvy_offset, 8, 8 );
555+            }
556+            else
557+                h->mc.load_deinterleave_8x8x2_fenc( pixuv[0], h->mb.pic.p_fref[0][a->l0.bi16x16.i_ref][4], h->mb.pic.i_stride[1] );
558+
559+            if( h->mb.b_interlaced & a->l1.bi16x16.i_ref )
560+            {
561+                int l1_mvy_offset = h->mb.b_interlaced & a->l1.bi16x16.i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
562+                h->mc.mc_chroma( pixuv[1], pixuv[1]+8, FENC_STRIDE, h->mb.pic.p_fref[1][a->l1.bi16x16.i_ref][4],
563+                                 h->mb.pic.i_stride[1], 0, 0 + l1_mvy_offset, 8, 8 );
564+            }
565+            else
566+                h->mc.load_deinterleave_8x8x2_fenc( pixuv[1], h->mb.pic.p_fref[1][a->l1.bi16x16.i_ref][4], h->mb.pic.i_stride[1] );
567+
568+            h->mc.avg[PIXEL_8x8]( bi, FENC_STRIDE, pixuv[0], FENC_STRIDE, pixuv[1], FENC_STRIDE,
569+                                  h->mb.bipred_weight[a->l0.bi16x16.i_ref][a->l1.bi16x16.i_ref] );
570+            h->mc.avg[PIXEL_8x8]( bi+8, FENC_STRIDE, pixuv[0]+8, FENC_STRIDE, pixuv[1]+8, FENC_STRIDE,
571+                                  h->mb.bipred_weight[a->l0.bi16x16.i_ref][a->l1.bi16x16.i_ref] );
572+
573+            cost00 += h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fenc[1], FENC_STRIDE, bi, FENC_STRIDE )
574+                   +  h->pixf.mbcmp[PIXEL_8x8]( h->mb.pic.p_fenc[2], FENC_STRIDE, bi+8, FENC_STRIDE );
575+        }
576+
577         if( cost00 < a->i_cost16x16bi )
578         {
579             M32( a->l0.bi16x16.mv ) = 0;
580@@ -2017,6 +2098,13 @@ static void x264_mb_analyse_inter_b8x8_mixed_ref( x264_t *h, x264_mb_analysis_t
581                          + a->l0.me8x8[i].i_ref_cost + a->l1.me8x8[i].i_ref_cost
582                          + a->i_lambda * i_sub_mb_b_cost_table[D_BI_8x8];
583 
584+        if( h->mb.b_chroma_me )
585+        {
586+            int i_chroma_cost = x264_analyse_bi_chroma( h, a, i, PIXEL_8x8 );
587+            i_part_cost_bi += i_chroma_cost;
588+            a->i_satd8x8[2][i] += i_chroma_cost;
589+        }
590+
591         a->l0.me8x8[i].cost += a->i_lambda * i_sub_mb_b_cost_table[D_L0_8x8];
592         a->l1.me8x8[i].cost += a->i_lambda * i_sub_mb_b_cost_table[D_L1_8x8];
593 
594@@ -2090,6 +2178,13 @@ static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
595         a->l0.me8x8[i].cost += a->i_lambda * i_sub_mb_b_cost_table[D_L0_8x8];
596         a->l1.me8x8[i].cost += a->i_lambda * i_sub_mb_b_cost_table[D_L1_8x8];
597 
598+        if( h->mb.b_chroma_me )
599+        {
600+            int i_chroma_cost = x264_analyse_bi_chroma( h, a, i, PIXEL_8x8 );
601+            i_part_cost_bi += i_chroma_cost;
602+            a->i_satd8x8[2][i] += i_chroma_cost;
603+        }
604+
605         i_part_cost = a->l0.me8x8[i].cost;
606         h->mb.i_sub_partition[i] = D_L0_8x8;
607         COPY2_IF_LT( i_part_cost, a->l1.me8x8[i].cost, h->mb.i_sub_partition[i], D_L1_8x8 );
608@@ -2162,6 +2257,9 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a, int i
609                         + a->l0.me16x8[i].cost_mv + a->l1.me16x8[i].cost_mv + a->l0.me16x8[i].i_ref_cost
610                         + a->l1.me16x8[i].i_ref_cost;
611 
612+        if( h->mb.b_chroma_me )
613+            i_part_cost_bi += x264_analyse_bi_chroma( h, a, i, PIXEL_16x8 );
614+
615         i_part_cost = a->l0.me16x8[i].cost;
616         a->i_mb_partition16x8[i] = D_L0_8x8; /* not actually 8x8, only the L0 matters */
617 
618@@ -2252,6 +2350,9 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a, int i
619                         + a->l0.me8x16[i].cost_mv + a->l1.me8x16[i].cost_mv + a->l0.me8x16[i].i_ref_cost
620                         + a->l1.me8x16[i].i_ref_cost;
621 
622+        if( h->mb.b_chroma_me )
623+            i_part_cost_bi += x264_analyse_bi_chroma( h, a, i, PIXEL_8x16 );
624+
625         i_part_cost = a->l0.me8x16[i].cost;
626         a->i_mb_partition8x16[i] = D_L0_8x8;
627 
628@@ -3249,7 +3350,16 @@ intra_analysis:
629                 h->mb.i_partition = i_partition;
630             }
631 
632-            x264_mb_analyse_intra( h, &analysis, i_satd_inter );
633+            if( h->mb.b_chroma_me )
634+            {
635+                x264_mb_analyse_intra_chroma( h, &analysis );
636+                x264_mb_analyse_intra( h, &analysis, i_satd_inter - analysis.i_satd_i8x8chroma );
637+                analysis.i_satd_i16x16 += analysis.i_satd_i8x8chroma;
638+                analysis.i_satd_i8x8 += analysis.i_satd_i8x8chroma;
639+                analysis.i_satd_i4x4 += analysis.i_satd_i8x8chroma;
640+            }
641+            else
642+                x264_mb_analyse_intra( h, &analysis, i_satd_inter );
643 
644             if( analysis.i_mbrd )
645             {
646-- 
6471.7.2.3
648
649
650From 594ff9664e7ac57a53fae8d9b5a1ecdf2ba5fa75 Mon Sep 17 00:00:00 2001
651From: Oskar Arvidsson <oskar@irock.se>
652Date: Mon, 27 Sep 2010 16:02:20 +0200
653Subject: [PATCH 7/8] Finish support for high-depth video throughout x264
654 Add support for high depth input in libx264.
655 Add support for 16-bit colorspaces in the filtering system.
656 Add support for input bit depths in the interval [9,16] with the raw demuxer.
657 Add a depth filter to dither input to x264.
658
659---
660 Makefile                 |    2 +-
661 common/common.c          |   17 ++--
662 common/frame.c           |   14 +++
663 common/mc.c              |   11 +--
664 encoder/encoder.c        |    8 +-
665 filters/video/crop.c     |    8 +-
666 filters/video/depth.c    |  228 ++++++++++++++++++++++++++++++++++++++++++++++
667 filters/video/internal.c |    1 +
668 filters/video/resize.c   |   84 +++++++++++++----
669 filters/video/video.c    |    1 +
670 input/input.c            |   13 ++-
671 input/input.h            |    7 +-
672 input/raw.c              |   33 ++++++-
673 x264.c                   |   21 ++++-
674 x264.h                   |   14 +++-
675 15 files changed, 405 insertions(+), 57 deletions(-)
676 create mode 100644 filters/video/depth.c
677
678diff --git a/Makefile b/Makefile
679index bab55e5..0cd7b82 100644
680--- a/Makefile
681+++ b/Makefile
682@@ -18,7 +18,7 @@ SRCCLI = x264.c input/input.c input/timecode.c input/raw.c input/y4m.c \
683          output/flv.c output/flv_bytestream.c filters/filters.c \
684          filters/video/video.c filters/video/source.c filters/video/internal.c \
685          filters/video/resize.c filters/video/cache.c filters/video/fix_vfr_pts.c \
686-         filters/video/select_every.c filters/video/crop.c
687+         filters/video/select_every.c filters/video/crop.c filters/video/depth.c
688 
689 SRCSO =
690 
691diff --git a/common/common.c b/common/common.c
692index c329cb0..aff5fc3 100644
693--- a/common/common.c
694+++ b/common/common.c
695@@ -33,6 +33,8 @@
696 #include <malloc.h>
697 #endif
698 
699+const int x264_bit_depth = BIT_DEPTH;
700+
701 static void x264_log_default( void *, int, const char *, va_list );
702 
703 /****************************************************************************
704@@ -1047,19 +1049,20 @@ int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_heigh
705     x264_picture_init( pic );
706     pic->img.i_csp = i_csp;
707     pic->img.i_plane = csp == X264_CSP_NV12 ? 2 : 3;
708-    pic->img.plane[0] = x264_malloc( 3 * i_width * i_height / 2 );
709+    int depth_factor = i_csp & X264_CSP_HIGH_DEPTH ? 2 : 1;
710+    pic->img.plane[0] = x264_malloc( 3 * i_width * i_height / 2 * depth_factor );
711     if( !pic->img.plane[0] )
712         return -1;
713-    pic->img.plane[1] = pic->img.plane[0] + i_width * i_height;
714+    pic->img.plane[1] = pic->img.plane[0] + i_width * i_height * depth_factor;
715     if( csp != X264_CSP_NV12 )
716-        pic->img.plane[2] = pic->img.plane[1] + i_width * i_height / 4;
717-    pic->img.i_stride[0] = i_width;
718+        pic->img.plane[2] = pic->img.plane[1] + i_width * i_height / 4 * depth_factor;
719+    pic->img.i_stride[0] = i_width * depth_factor;
720     if( csp == X264_CSP_NV12 )
721-        pic->img.i_stride[1] = i_width;
722+        pic->img.i_stride[1] = i_width * depth_factor;
723     else
724     {
725-        pic->img.i_stride[1] = i_width / 2;
726-        pic->img.i_stride[2] = i_width / 2;
727+        pic->img.i_stride[1] = i_width / 2 * depth_factor;
728+        pic->img.i_stride[2] = i_width / 2 * depth_factor;
729     }
730     return 0;
731 }
732diff --git a/common/frame.c b/common/frame.c
733index 0c3d77f..95666da 100644
734--- a/common/frame.c
735+++ b/common/frame.c
736@@ -263,6 +263,20 @@ int x264_frame_copy_picture( x264_t *h, x264_frame_t *dst, x264_picture_t *src )
737         return -1;
738     }
739 
740+#if X264_HIGH_BIT_DEPTH
741+    if( !(src->img.i_csp & X264_CSP_HIGH_DEPTH) )
742+    {
743+        x264_log( h, X264_LOG_ERROR, "This build of x264 requires high depth input. Rebuild to support 8-bit input.\n" );
744+        return -1;
745+    }
746+#else
747+    if( src->img.i_csp & X264_CSP_HIGH_DEPTH )
748+    {
749+        x264_log( h, X264_LOG_ERROR, "This build of x264 requires 8-bit input. Rebuild to support high depth input.\n" );
750+        return -1;
751+    }
752+#endif
753+
754     dst->i_type     = src->i_type;
755     dst->i_qpplus1  = src->i_qpplus1;
756     dst->i_pts      = dst->i_reordered_pts = src->i_pts;
757diff --git a/common/mc.c b/common/mc.c
758index 5b58a76..acc2312 100644
759--- a/common/mc.c
760+++ b/common/mc.c
761@@ -302,12 +302,7 @@ void x264_plane_copy_c( pixel *dst, int i_dst,
762 {
763     while( h-- )
764     {
765-#if X264_HIGH_BIT_DEPTH
766-        for( int i = 0; i < w; i++ )
767-            dst[i] = src[i] << (BIT_DEPTH-8);
768-#else
769-        memcpy( dst, src, w );
770-#endif
771+        memcpy( dst, src, w * sizeof(pixel) );
772         dst += i_dst;
773         src += i_src;
774     }
775@@ -320,8 +315,8 @@ void x264_plane_copy_interleave_c( pixel *dst, int i_dst,
776     for( int y=0; y<h; y++, dst+=i_dst, srcu+=i_srcu, srcv+=i_srcv )
777         for( int x=0; x<w; x++ )
778         {
779-            dst[2*x]   = srcu[x] << (BIT_DEPTH-8);
780-            dst[2*x+1] = srcv[x] << (BIT_DEPTH-8);
781+            dst[2*x]   = ((pixel*)srcu)[x];
782+            dst[2*x+1] = ((pixel*)srcv)[x];
783         }
784 }
785 
786diff --git a/encoder/encoder.c b/encoder/encoder.c
787index 28ded05..62a4350 100644
788--- a/encoder/encoder.c
789+++ b/encoder/encoder.c
790@@ -2777,12 +2777,14 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
791         x264_log( h, X264_LOG_WARNING, "invalid DTS: PTS is less than DTS\n" );
792 
793     pic_out->img.i_csp = X264_CSP_NV12;
794+#if X264_HIGH_BIT_DEPTH
795+    pic_out->img.i_csp |= X264_CSP_HIGH_DEPTH;
796+#endif
797     pic_out->img.i_plane = h->fdec->i_plane;
798     for( int i = 0; i < 2; i++ )
799     {
800-        pic_out->img.i_stride[i] = h->fdec->i_stride[i];
801-        // FIXME This breaks the API when pixel != uint8_t.
802-        pic_out->img.plane[i] = h->fdec->plane[i];
803+        pic_out->img.i_stride[i] = h->fdec->i_stride[i] * sizeof(pixel);
804+        pic_out->img.plane[i] = (uint8_t*)h->fdec->plane[i];
805     }
806 
807     x264_frame_push_unused( thread_current, h->fenc );
808diff --git a/filters/video/crop.c b/filters/video/crop.c
809index 2a3c2b1..b70476e 100644
810--- a/filters/video/crop.c
811+++ b/filters/video/crop.c
812@@ -103,8 +103,12 @@ static int get_frame( hnd_t handle, cli_pic_t *output, int frame )
813     output->img.height = h->dims[3];
814     /* shift the plane pointers down 'top' rows and right 'left' columns. */
815     for( int i = 0; i < output->img.planes; i++ )
816-        output->img.plane[i] += (int)(output->img.stride[i] * h->dims[1] * h->csp->height[i]
817-                                    + h->dims[0] * h->csp->width[i]);
818+    {
819+        intptr_t offset = output->img.stride[i] * h->dims[1] * h->csp->height[i];
820+        offset += h->dims[0] * h->csp->width[i];
821+        offset *= x264_cli_csp_depth_factor( output->img.csp );
822+        output->img.plane[i] += offset;
823+    }
824     return 0;
825 }
826 
827diff --git a/filters/video/depth.c b/filters/video/depth.c
828new file mode 100644
829index 0000000..a0411c5
830--- /dev/null
831+++ b/filters/video/depth.c
832@@ -0,0 +1,228 @@
833+/*****************************************************************************
834+ * depth.c: x264 video depth filter
835+ *****************************************************************************
836+ * Copyright (C) 2010 Oskar Arvidsson <oskar@irock.se>
837+ *
838+ * This program is free software; you can redistribute it and/or modify
839+ * it under the terms of the GNU General Public License as published by
840+ * the Free Software Foundation; either version 2 of the License, or
841+ * (at your option) any later version.
842+ *
843+ * This program is distributed in the hope that it will be useful,
844+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
845+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
846+ * GNU General Public License for more details.
847+ *
848+ * You should have received a copy of the GNU General Public License
849+ * along with this program; if not, write to the Free Software
850+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
851+ *****************************************************************************/
852+
853+#include "video.h"
854+#define NAME "depth"
855+#define FAIL_IF_ERROR( cond, ... ) FAIL_IF_ERR( cond, NAME, __VA_ARGS__ )
856+
857+cli_vid_filter_t depth_filter;
858+
859+typedef struct
860+{
861+    hnd_t prev_hnd;
862+    cli_vid_filter_t prev_filter;
863+
864+    int bit_depth;
865+    int dst_csp;
866+    cli_pic_t buffer;
867+    int16_t *error_buf;
868+} depth_hnd_t;
869+
870+static int depth_filter_csp_is_supported( int csp )
871+{
872+    int csp_mask = csp & X264_CSP_MASK;
873+    return csp_mask == X264_CSP_I420 ||
874+           csp_mask == X264_CSP_I422 ||
875+           csp_mask == X264_CSP_I444 ||
876+           csp_mask == X264_CSP_YV12 ||
877+           csp_mask == X264_CSP_NV12;
878+}
879+
880+static int csp_num_interleaved( int csp, int plane )
881+{
882+    int csp_mask = csp & X264_CSP_MASK;
883+    return ( csp_mask == X264_CSP_NV12 && plane == 1 ) ? 2 : 1;
884+}
885+
886+/* The dithering algorithm is based on Sierra-2-4A error diffusion. It has been
887+ * written in such a way so that if the source has been upconverted using the
888+ * same algorithm as used in scale_image, dithering down to the source bit
889+ * depth again is lossless. */
890+#define DITHER_PLANE( pitch ) \
891+static void dither_plane_##pitch( pixel *dst, int dst_stride, uint16_t *src, int src_stride, \
892+                                        int width, int height, int16_t *errors ) \
893+{ \
894+    const int lshift = 16-BIT_DEPTH; \
895+    const int rshift = 2*BIT_DEPTH-16; \
896+    const int pixel_max = (1 << BIT_DEPTH)-1; \
897+    const int half = 1 << (16-BIT_DEPTH); \
898+    memset( errors, 0, (width+1) * sizeof(int16_t) ); \
899+    for( int y = 0; y < height; y++, src += src_stride, dst += dst_stride ) \
900+    { \
901+        int err = 0; \
902+        for( int x = 0; x < width; x++ ) \
903+        { \
904+            err += errors[x] + errors[x+1]; \
905+            dst[x*pitch] = x264_clip3( (((src[x*pitch]+half)<<2)+err)*pixel_max >> 18, 0, pixel_max ); \
906+            errors[x] = src[x*pitch] - (dst[x*pitch] << lshift) - (dst[x*pitch] >> rshift); \
907+            err = errors[x] << 1; \
908+        } \
909+    } \
910+}
911+
912+DITHER_PLANE( 1 )
913+DITHER_PLANE( 2 )
914+
915+static void dither_image( cli_image_t *out, cli_image_t *img, int16_t *error_buf )
916+{
917+    int csp_mask = img->csp & X264_CSP_MASK;
918+    for( int i = 0; i < img->planes; i++ )
919+    {
920+        int num_interleaved = csp_num_interleaved( img->csp, i );
921+        int height = x264_cli_csps[csp_mask].height[i] * img->height;
922+        int width = x264_cli_csps[csp_mask].width[i] * img->width / num_interleaved;
923+
924+#define CALL_DITHER_PLANE( pitch, off ) \
925+        dither_plane_##pitch( ((pixel*)out->plane[i])+off, out->stride[i]/sizeof(pixel), \
926+                ((uint16_t*)img->plane[i])+off, img->stride[i]/2, width, height, error_buf )
927+
928+        if( num_interleaved == 1 )
929+        {
930+            CALL_DITHER_PLANE( 1, 0 );
931+        }
932+        else
933+        {
934+            CALL_DITHER_PLANE( 2, 0 );
935+            CALL_DITHER_PLANE( 2, 1 );
936+        }
937+    }
938+}
939+
940+static void scale_image( cli_image_t *output, cli_image_t *img )
941+{
942+    /* this function mimics how swscale does upconversion. 8-bit is converted
943+     * to 16-bit through left shifting the orginal value with 8 and then adding
944+     * the original value to that. This effectively keeps the full color range
945+     * while also being fast. for n-bit we basically do the same thing, but we
946+     * discard the lower 16-n bits. */
947+    int csp_mask = img->csp & X264_CSP_MASK;
948+    const int shift = 16-BIT_DEPTH;
949+    for( int i = 0; i < img->planes; i++ )
950+    {
951+        uint8_t *src = img->plane[i];
952+        uint16_t *dst = (uint16_t*)output->plane[i];
953+        int height = x264_cli_csps[csp_mask].height[i] * img->height;
954+        int width = x264_cli_csps[csp_mask].width[i] * img->width;
955+
956+        for( int j = 0; j < height; j++ )
957+        {
958+            for( int k = 0; k < width; k++ )
959+                dst[k] = ((src[k] << 8) + src[k]) >> shift;
960+
961+            src += img->stride[i];
962+            dst += output->stride[i]/2;
963+        }
964+    }
965+}
966+
967+static int get_frame( hnd_t handle, cli_pic_t *output, int frame )
968+{
969+    depth_hnd_t *h = handle;
970+
971+    if( h->prev_filter.get_frame( h->prev_hnd, output, frame ) )
972+        return -1;
973+
974+    if( h->bit_depth < 16 && output->img.csp & X264_CSP_HIGH_DEPTH )
975+    {
976+        dither_image( &h->buffer.img, &output->img, h->error_buf );
977+        output->img = h->buffer.img;
978+    }
979+    else if( h->bit_depth > 8 && !(output->img.csp & X264_CSP_HIGH_DEPTH) )
980+    {
981+        scale_image( &h->buffer.img, &output->img );
982+        output->img = h->buffer.img;
983+    }
984+    return 0;
985+}
986+
987+static int release_frame( hnd_t handle, cli_pic_t *pic, int frame )
988+{
989+    depth_hnd_t *h = handle;
990+    return h->prev_filter.release_frame( h->prev_hnd, pic, frame );
991+}
992+
993+static void free_filter( hnd_t handle )
994+{
995+    depth_hnd_t *h = handle;
996+    h->prev_filter.free( h->prev_hnd );
997+    x264_cli_pic_clean( &h->buffer );
998+    x264_free( h );
999+}
1000+
1001+static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info,
1002+                 x264_param_t *param, char *opt_string )
1003+{
1004+    int ret = 0;
1005+    int change_fmt = (info->csp ^ param->i_csp) & X264_CSP_HIGH_DEPTH;
1006+    int csp = ~(~info->csp ^ change_fmt);
1007+    int bit_depth = 8*x264_cli_csp_depth_factor( csp );
1008+
1009+    if( opt_string )
1010+    {
1011+        static const char *optlist[] = { "bit_depth", NULL };
1012+        char **opts = x264_split_options( opt_string, optlist );
1013+
1014+        if( opts )
1015+        {
1016+            char *str_bit_depth = x264_get_option( "bit_depth", opts );
1017+            bit_depth = x264_otoi( str_bit_depth, -1 );
1018+
1019+            ret = bit_depth < 8 || bit_depth > 16;
1020+            csp = bit_depth > 8 ? csp | X264_CSP_HIGH_DEPTH : csp & ~X264_CSP_HIGH_DEPTH;
1021+            change_fmt = (info->csp ^ csp) & X264_CSP_HIGH_DEPTH;
1022+            x264_free_string_array( opts );
1023+        }
1024+        else
1025+            ret = 1;
1026+    }
1027+
1028+    FAIL_IF_ERROR( bit_depth != BIT_DEPTH, "this build supports only bit depth %d\n", BIT_DEPTH )
1029+    FAIL_IF_ERROR( ret, "unsupported bit depth conversion.\n" )
1030+
1031+    /* only add the filter to the chain if it's needed */
1032+    if( change_fmt || bit_depth != 8 * x264_cli_csp_depth_factor( csp ) )
1033+    {
1034+        FAIL_IF_ERROR( !depth_filter_csp_is_supported(csp), "unsupported colorspace.\n" )
1035+        depth_hnd_t *h = x264_malloc( sizeof(depth_hnd_t) + (info->width+1)*sizeof(int16_t) );
1036+
1037+        if( !h )
1038+            return -1;
1039+
1040+        h->error_buf = (int16_t*)(h + 1);
1041+        h->dst_csp = csp;
1042+        h->bit_depth = bit_depth;
1043+        h->prev_hnd = *handle;
1044+        h->prev_filter = *filter;
1045+
1046+        if( x264_cli_pic_alloc( &h->buffer, h->dst_csp, info->width, info->height ) )
1047+        {
1048+            x264_free( h );
1049+            return -1;
1050+        }
1051+
1052+        *handle = h;
1053+        *filter = depth_filter;
1054+        info->csp = h->dst_csp;
1055+    }
1056+
1057+    return 0;
1058+}
1059+
1060+cli_vid_filter_t depth_filter = { NAME, NULL, init, get_frame, release_frame, free_filter, NULL };
1061diff --git a/filters/video/internal.c b/filters/video/internal.c
1062index 444ea1f..ef096dc 100644
1063--- a/filters/video/internal.c
1064+++ b/filters/video/internal.c
1065@@ -51,6 +51,7 @@ int x264_cli_pic_copy( cli_pic_t *out, cli_pic_t *in )
1066     {
1067         int height = in->img.height * x264_cli_csps[csp].height[i];
1068         int width =  in->img.width  * x264_cli_csps[csp].width[i];
1069+        width *= x264_cli_csp_depth_factor( in->img.csp );
1070         x264_cli_plane_copy( out->img.plane[i], out->img.stride[i], in->img.plane[i],
1071                              in->img.stride[i], width, height );
1072     }
1073diff --git a/filters/video/resize.c b/filters/video/resize.c
1074index 38077b2..04b5e73 100644
1075--- a/filters/video/resize.c
1076+++ b/filters/video/resize.c
1077@@ -79,10 +79,21 @@ static void help( int longhelp )
1078             "            - fittobox: resizes the video based on the desired contraints\n"
1079             "               - width, height, both\n"
1080             "            - fittobox and sar: same as above except with specified sar\n"
1081-            "            simultaneously converting to the given colorspace\n"
1082-            "            using resizer method [\"bicubic\"]\n"
1083-            "             - fastbilinear, bilinear, bicubic, experimental, point,\n"
1084-            "             - area, bicublin, gauss, sinc, lanczos, spline\n" );
1085+            "            - csp: convert to the given csp. syntax: [name][:depth]\n"
1086+            "               - valid csp names [keep current]: " );
1087+
1088+    for( int i = X264_CSP_NONE+1; i < X264_CSP_CLI_MAX; i++ )
1089+    {
1090+        printf( "%s", x264_cli_csps[i].name );
1091+        if( i+1 < X264_CSP_CLI_MAX )
1092+            printf( ", " );
1093+    }
1094+    printf( "\n"
1095+            "               - depth: 8 or 16 bits per pixel [keep current]\n"
1096+            "            note: not all depths are supported by all csps.\n"
1097+            "            - method: use resizer method [\"bicubic\"]\n"
1098+            "               - fastbilinear, bilinear, bicubic, experimental, point,\n"
1099+            "               - area, bicublin, gauss, sinc, lanczos, spline\n" );
1100 }
1101 
1102 static uint32_t convert_cpu_to_flag( uint32_t cpu )
1103@@ -131,13 +142,15 @@ static int convert_csp_to_pix_fmt( int csp )
1104         return csp&X264_CSP_MASK;
1105     switch( csp&X264_CSP_MASK )
1106     {
1107-        case X264_CSP_I420: return PIX_FMT_YUV420P;
1108-        case X264_CSP_I422: return PIX_FMT_YUV422P;
1109-        case X264_CSP_I444: return PIX_FMT_YUV444P;
1110-        case X264_CSP_NV12: return PIX_FMT_NV12;
1111-        case X264_CSP_YV12: return PIX_FMT_YUV420P; /* specially handled via swapping chroma */
1112-        case X264_CSP_BGR:  return PIX_FMT_BGR24;
1113-        case X264_CSP_BGRA: return PIX_FMT_BGRA;
1114+        case X264_CSP_YV12: /* specially handled via swapping chroma */
1115+        case X264_CSP_I420: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV420P16 : PIX_FMT_YUV420P;
1116+        case X264_CSP_I422: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV422P16 : PIX_FMT_YUV422P;
1117+        case X264_CSP_I444: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_YUV444P16 : PIX_FMT_YUV444P;
1118+        case X264_CSP_RGB:  return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_RGB48     : PIX_FMT_RGB24;
1119+        /* the next 3 csps have no equivalent 16bit depth in swscale */
1120+        case X264_CSP_NV12: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_NONE      : PIX_FMT_NV12;
1121+        case X264_CSP_BGR:  return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_NONE      : PIX_FMT_BGR24;
1122+        case X264_CSP_BGRA: return csp&X264_CSP_HIGH_DEPTH ? PIX_FMT_NONE      : PIX_FMT_BGRA;
1123         default:            return PIX_FMT_NONE;
1124     }
1125 }
1126@@ -147,23 +160,30 @@ static int pick_closest_supported_csp( int csp )
1127     int pix_fmt = convert_csp_to_pix_fmt( csp );
1128     switch( pix_fmt )
1129     {
1130+        case PIX_FMT_YUV420P16LE:
1131+        case PIX_FMT_YUV420P16BE:
1132+            return X264_CSP_I420 | X264_CSP_HIGH_DEPTH;
1133         case PIX_FMT_YUV422P:
1134-        case PIX_FMT_YUV422P16LE:
1135-        case PIX_FMT_YUV422P16BE:
1136         case PIX_FMT_YUYV422:
1137         case PIX_FMT_UYVY422:
1138             return X264_CSP_I422;
1139+        case PIX_FMT_YUV422P16LE:
1140+        case PIX_FMT_YUV422P16BE:
1141+            return X264_CSP_I422 | X264_CSP_HIGH_DEPTH;
1142         case PIX_FMT_YUV444P:
1143+            return X264_CSP_I444;
1144         case PIX_FMT_YUV444P16LE:
1145         case PIX_FMT_YUV444P16BE:
1146-            return X264_CSP_I444;
1147-        case PIX_FMT_RGB24:    // convert rgb to bgr
1148-        case PIX_FMT_RGB48BE:
1149-        case PIX_FMT_RGB48LE:
1150+            return X264_CSP_I444 | X264_CSP_HIGH_DEPTH;
1151+        case PIX_FMT_RGB24:
1152         case PIX_FMT_RGB565BE:
1153         case PIX_FMT_RGB565LE:
1154         case PIX_FMT_RGB555BE:
1155         case PIX_FMT_RGB555LE:
1156+            return X264_CSP_RGB;
1157+        case PIX_FMT_RGB48BE:
1158+        case PIX_FMT_RGB48LE:
1159+            return X264_CSP_RGB | X264_CSP_HIGH_DEPTH;
1160         case PIX_FMT_BGR24:
1161         case PIX_FMT_BGR565BE:
1162         case PIX_FMT_BGR565LE:
1163@@ -209,12 +229,27 @@ static int handle_opts( const char **optlist, char **opts, video_info_t *info, r
1164 
1165     if( str_csp )
1166     {
1167-        /* output csp was specified, lookup against valid values */
1168+        /* output csp was specified, first check if optional depth was provided */
1169+        char *str_depth = strchr( str_csp, ':' );
1170+        int depth = x264_cli_csp_depth_factor( info->csp ) * 8;
1171+        if( str_depth )
1172+        {
1173+            /* csp bit depth was specified */
1174+            *str_depth++ = '\0';
1175+            depth = x264_otoi( str_depth, -1 );
1176+            FAIL_IF_ERROR( depth != 8 && depth != 16, "unsupported bit depth %d\n", depth );
1177+        }
1178+        /* now lookup against the list of valid csps */
1179         int csp;
1180-        for( csp = X264_CSP_CLI_MAX-1; x264_cli_csps[csp].name && strcasecmp( x264_cli_csps[csp].name, str_csp ); )
1181-            csp--;
1182+        if( strlen( str_csp ) == 0 )
1183+            csp = info->csp & X264_CSP_MASK;
1184+        else
1185+            for( csp = X264_CSP_CLI_MAX-1; x264_cli_csps[csp].name && strcasecmp( x264_cli_csps[csp].name, str_csp ); )
1186+                csp--;
1187         FAIL_IF_ERROR( csp == X264_CSP_NONE, "unsupported colorspace `%s'\n", str_csp );
1188         h->dst_csp = csp;
1189+        if( depth == 16 )
1190+            h->dst_csp |= X264_CSP_HIGH_DEPTH;
1191     }
1192 
1193     /* if the input sar is currently invalid, set it to 1:1 so it can be used in math */
1194@@ -366,8 +401,17 @@ static int init( hnd_t *handle, cli_vid_filter_t *filter, video_info_t *info, x2
1195     h->swap_chroma = (info->csp & X264_CSP_MASK) == X264_CSP_YV12;
1196     int src_pix_fmt = convert_csp_to_pix_fmt( info->csp );
1197 
1198+    int src_pix_fmt_inv = convert_csp_to_pix_fmt( info->csp ^ X264_CSP_HIGH_DEPTH );
1199+    int dst_pix_fmt_inv = convert_csp_to_pix_fmt( h->dst_csp ^ X264_CSP_HIGH_DEPTH );
1200+
1201     /* confirm swscale can support this conversion */
1202+    FAIL_IF_ERROR( src_pix_fmt == PIX_FMT_NONE && src_pix_fmt_inv != PIX_FMT_NONE,
1203+                   "input colorspace %s with bit depth %d is not supported\n", sws_format_name( src_pix_fmt_inv ),
1204+                   info->csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
1205     FAIL_IF_ERROR( !sws_isSupportedInput( src_pix_fmt ), "input colorspace %s is not supported\n", sws_format_name( src_pix_fmt ) )
1206+    FAIL_IF_ERROR( h->dst.pix_fmt == PIX_FMT_NONE && dst_pix_fmt_inv != PIX_FMT_NONE,
1207+                   "input colorspace %s with bit depth %d is not supported\n", sws_format_name( dst_pix_fmt_inv ),
1208+                   h->dst_csp & X264_CSP_HIGH_DEPTH ? 16 : 8 );
1209     FAIL_IF_ERROR( !sws_isSupportedOutput( h->dst.pix_fmt ), "output colorspace %s is not supported\n", sws_format_name( h->dst.pix_fmt ) )
1210     FAIL_IF_ERROR( h->dst.height != info->height && info->interlaced,
1211                    "swscale is not compatible with interlaced vertical resizing\n" )
1212diff --git a/filters/video/video.c b/filters/video/video.c
1213index 61dc8c6..71ae01e 100644
1214--- a/filters/video/video.c
1215+++ b/filters/video/video.c
1216@@ -51,6 +51,7 @@ void x264_register_vid_filters()
1217     REGISTER_VFILTER( fix_vfr_pts );
1218     REGISTER_VFILTER( resize );
1219     REGISTER_VFILTER( select_every );
1220+    REGISTER_VFILTER( depth );
1221 #if HAVE_GPL
1222 #endif
1223 }
1224diff --git a/input/input.c b/input/input.c
1225index 78c7a88..a14bd3c 100644
1226--- a/input/input.c
1227+++ b/input/input.c
1228@@ -32,7 +32,8 @@ const x264_cli_csp_t x264_cli_csps[] = {
1229     [X264_CSP_YV12] = { "yv12", 3, { 1, .5, .5 }, { 1, .5, .5 }, 2, 2 },
1230     [X264_CSP_NV12] = { "nv12", 2, { 1,  1 },     { 1, .5 },     2, 2 },
1231     [X264_CSP_BGR]  = { "bgr",  1, { 3 },         { 1 },         1, 1 },
1232-    [X264_CSP_BGRA] = { "bgra", 1, { 4 },         { 1 },         1, 1 }
1233+    [X264_CSP_BGRA] = { "bgra", 1, { 4 },         { 1 },         1, 1 },
1234+    [X264_CSP_RGB]  = { "rgb",  1, { 3 },         { 1 },         1, 1 },
1235 };
1236 
1237 int x264_cli_csp_is_invalid( int csp )
1238@@ -41,6 +42,13 @@ int x264_cli_csp_is_invalid( int csp )
1239     return csp_mask <= X264_CSP_NONE || csp_mask >= X264_CSP_CLI_MAX || csp & X264_CSP_OTHER;
1240 }
1241 
1242+int x264_cli_csp_depth_factor( int csp )
1243+{
1244+    if( x264_cli_csp_is_invalid( csp ) )
1245+        return 0;
1246+    return (csp & X264_CSP_HIGH_DEPTH) ? 2 : 1;
1247+}
1248+
1249 uint64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane )
1250 {
1251     int csp_mask = csp & X264_CSP_MASK;
1252@@ -48,6 +56,7 @@ uint64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane )
1253         return 0;
1254     uint64_t size = (uint64_t)width * height;
1255     size *= x264_cli_csps[csp_mask].width[plane] * x264_cli_csps[csp_mask].height[plane];
1256+    size *= x264_cli_csp_depth_factor( csp );
1257     return size;
1258 }
1259 
1260@@ -78,7 +87,7 @@ int x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height )
1261          pic->img.plane[i] = x264_malloc( x264_cli_pic_plane_size( csp, width, height, i ) );
1262          if( !pic->img.plane[i] )
1263              return -1;
1264-         pic->img.stride[i] = width * x264_cli_csps[csp_mask].width[i];
1265+         pic->img.stride[i] = width * x264_cli_csps[csp_mask].width[i] * x264_cli_csp_depth_factor( csp );
1266     }
1267 
1268     return 0;
1269diff --git a/input/input.h b/input/input.h
1270index 972dd9c..43826d7 100644
1271--- a/input/input.h
1272+++ b/input/input.h
1273@@ -36,6 +36,7 @@ typedef struct
1274     char *index_file;
1275     char *resolution;
1276     char *colorspace;
1277+    int bit_depth;
1278     char *timebase;
1279     int seek;
1280 } cli_input_opt_t;
1281@@ -103,8 +104,9 @@ extern cli_input_t input;
1282 #define X264_CSP_I444          (X264_CSP_MAX+1)  /* yuv 4:4:4 planar    */
1283 #define X264_CSP_BGR           (X264_CSP_MAX+2)  /* packed bgr 24bits   */
1284 #define X264_CSP_BGRA          (X264_CSP_MAX+3)  /* packed bgr 32bits   */
1285-#define X264_CSP_CLI_MAX       (X264_CSP_MAX+4)  /* end of list         */
1286-#define X264_CSP_OTHER          0x2000           /* non x264 colorspace */
1287+#define X264_CSP_RGB           (X264_CSP_MAX+4)  /* packed rgb 24bits   */
1288+#define X264_CSP_CLI_MAX       (X264_CSP_MAX+5)  /* end of list         */
1289+#define X264_CSP_OTHER          0x4000           /* non x264 colorspace */
1290 
1291 typedef struct
1292 {
1293@@ -119,6 +121,7 @@ typedef struct
1294 extern const x264_cli_csp_t x264_cli_csps[];
1295 
1296 int      x264_cli_csp_is_invalid( int csp );
1297+int      x264_cli_csp_depth_factor( int csp );
1298 int      x264_cli_pic_alloc( cli_pic_t *pic, int csp, int width, int height );
1299 void     x264_cli_pic_clean( cli_pic_t *pic );
1300 uint64_t x264_cli_pic_plane_size( int csp, int width, int height, int plane );
1301diff --git a/input/raw.c b/input/raw.c
1302index f5fbed6..9617fb1 100644
1303--- a/input/raw.c
1304+++ b/input/raw.c
1305@@ -34,11 +34,12 @@ typedef struct
1306     int next_frame;
1307     uint64_t plane_size[4];
1308     uint64_t frame_size;
1309+    int bit_depth;
1310 } raw_hnd_t;
1311 
1312 static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, cli_input_opt_t *opt )
1313 {
1314-    raw_hnd_t *h = malloc( sizeof(raw_hnd_t) );
1315+    raw_hnd_t *h = calloc( 1, sizeof(raw_hnd_t) );
1316     if( !h )
1317         return -1;
1318 
1319@@ -61,8 +62,10 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1320     else /* default */
1321         info->csp = X264_CSP_I420;
1322 
1323-    h->next_frame = 0;
1324-    info->vfr     = 0;
1325+    h->bit_depth = opt->bit_depth;
1326+    FAIL_IF_ERROR( h->bit_depth < 8 || h->bit_depth > 16, "unsupported bit depth `%d'\n", h->bit_depth );
1327+    if( h->bit_depth > 8 )
1328+        info->csp |= X264_CSP_HIGH_DEPTH;
1329 
1330     if( !strcmp( psz_filename, "-" ) )
1331         h->fh = stdin;
1332@@ -73,11 +76,15 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1333 
1334     info->thread_safe = 1;
1335     info->num_frames  = 0;
1336-    h->frame_size = 0;
1337-    for( int i = 0; i < x264_cli_csps[info->csp].planes; i++ )
1338+    info->vfr         = 0;
1339+
1340+    const x264_cli_csp_t *csp = x264_cli_get_csp( info->csp );
1341+    for( int i = 0; i < csp->planes; i++ )
1342     {
1343         h->plane_size[i] = x264_cli_pic_plane_size( info->csp, info->width, info->height, i );
1344         h->frame_size += h->plane_size[i];
1345+        /* x264_cli_pic_plane_size returns the size in bytes, we need the value in pixels from here on */
1346+        h->plane_size[i] /= x264_cli_csp_depth_factor( info->csp );
1347     }
1348 
1349     if( x264_is_regular_file( h->fh ) )
1350@@ -95,8 +102,22 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1351 static int read_frame_internal( cli_pic_t *pic, raw_hnd_t *h )
1352 {
1353     int error = 0;
1354+    int pixel_depth = x264_cli_csp_depth_factor( pic->img.csp );
1355     for( int i = 0; i < pic->img.planes && !error; i++ )
1356-        error |= fread( pic->img.plane[i], h->plane_size[i], 1, h->fh ) <= 0;
1357+    {
1358+        error |= fread( pic->img.plane[i], pixel_depth, h->plane_size[i], h->fh ) != h->plane_size[i];
1359+        if( h->bit_depth & 7 )
1360+        {
1361+            /* upconvert non 16bit high depth planes to 16bit using the same
1362+             * algorithm as used in the depth filter. */
1363+            uint16_t *plane = (uint16_t*)pic->img.plane[i];
1364+            uint64_t pixel_count = h->plane_size[i];
1365+            int lshift = 16 - h->bit_depth;
1366+            int rshift = 2*h->bit_depth - 16;
1367+            for( uint64_t j = 0; j < pixel_count; j++ )
1368+                plane[j] = (plane[j] << lshift) + (plane[j] >> rshift);
1369+        }
1370+    }
1371     return error;
1372 }
1373 
1374diff --git a/x264.c b/x264.c
1375index 5bd2af7..415181c 100644
1376--- a/x264.c
1377+++ b/x264.c
1378@@ -214,7 +214,7 @@ static void print_version_info()
1379 #else
1380     printf( "using a non-gcc compiler\n" );
1381 #endif
1382-    printf( "configuration: --bit-depth=%d\n", BIT_DEPTH );
1383+    printf( "configuration: --bit-depth=%d\n", x264_bit_depth );
1384     printf( "x264 license: " );
1385 #if HAVE_GPL
1386     printf( "GPL version 2 or later\n" );
1387@@ -375,7 +375,7 @@ static void Help( x264_param_t *defaults, int longhelp )
1388 #else
1389         "no",
1390 #endif
1391-        BIT_DEPTH
1392+        x264_bit_depth
1393       );
1394     H0( "Example usage:\n" );
1395     H0( "\n" );
1396@@ -697,6 +697,7 @@ static void Help( x264_param_t *defaults, int longhelp )
1397         "                                  - %s\n", demuxer_names[0], stringify_names( buf, demuxer_names ) );
1398     H1( "      --input-csp <string>    Specify input colorspace format for raw input\n" );
1399     print_csp_names( longhelp );
1400+    H1( "      --input-depth <integer> Specify input bit depth for raw input\n" );
1401     H1( "      --input-res <intxint>   Specify input resolution (width x height)\n" );
1402     H1( "      --index <string>        Filename for input index file\n" );
1403     H0( "      --sar width:height      Specify Sample Aspect Ratio\n" );
1404@@ -769,7 +770,8 @@ enum {
1405     OPT_LOG_LEVEL,
1406     OPT_VIDEO_FILTER,
1407     OPT_INPUT_RES,
1408-    OPT_INPUT_CSP
1409+    OPT_INPUT_CSP,
1410+    OPT_INPUT_DEPTH
1411 } OptionsOPT;
1412 
1413 static char short_options[] = "8A:B:b:f:hI:i:m:o:p:q:r:t:Vvw";
1414@@ -921,6 +923,7 @@ static struct option long_options[] =
1415     { "video-filter", required_argument, NULL, OPT_VIDEO_FILTER },
1416     { "input-res",   required_argument, NULL, OPT_INPUT_RES },
1417     { "input-csp",   required_argument, NULL, OPT_INPUT_CSP },
1418+    { "input-depth", required_argument, NULL, OPT_INPUT_DEPTH },
1419     {0, 0, 0, 0}
1420 };
1421 
1422@@ -1082,10 +1085,16 @@ static int init_vid_filters( char *sequence, hnd_t *handle, video_info_t *info,
1423     if( csp > X264_CSP_NONE && csp < X264_CSP_MAX )
1424         param->i_csp = info->csp;
1425     else
1426-        param->i_csp = X264_CSP_I420;
1427+        param->i_csp = X264_CSP_I420 | ( info->csp & X264_CSP_HIGH_DEPTH );
1428     if( x264_init_vid_filter( "resize", handle, &filter, info, param, NULL ) )
1429         return -1;
1430 
1431+    char args[20];
1432+    sprintf( args, "bit_depth=%d", x264_bit_depth );
1433+
1434+    if( x264_init_vid_filter( "depth", handle, &filter, info, param, args ) )
1435+        return -1;
1436+
1437     return 0;
1438 }
1439 
1440@@ -1138,6 +1147,7 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
1441 
1442     memset( opt, 0, sizeof(cli_opt_t) );
1443     memset( &input_opt, 0, sizeof(cli_input_opt_t) );
1444+    input_opt.bit_depth = 8;
1445     opt->b_progress = 1;
1446 
1447     /* Presets are applied before all other options. */
1448@@ -1283,6 +1293,9 @@ static int Parse( int argc, char **argv, x264_param_t *param, cli_opt_t *opt )
1449             case OPT_INPUT_CSP:
1450                 input_opt.colorspace = optarg;
1451                 break;
1452+            case OPT_INPUT_DEPTH:
1453+                input_opt.bit_depth = atoi( optarg );
1454+                break;
1455             default:
1456 generic_option:
1457             {
1458diff --git a/x264.h b/x264.h
1459index 56d424c..c9b182a 100644
1460--- a/x264.h
1461+++ b/x264.h
1462@@ -180,7 +180,8 @@ static const char * const x264_open_gop_names[] = { "none", "normal", "bluray",
1463 #define X264_CSP_YV12           0x0002  /* yvu 4:2:0 planar */
1464 #define X264_CSP_NV12           0x0003  /* yuv 4:2:0, with one y plane and one packed u+v */
1465 #define X264_CSP_MAX            0x0004  /* end of list */
1466-#define X264_CSP_VFLIP          0x1000  /* */
1467+#define X264_CSP_VFLIP          0x1000  /* the csp is vertically flipped */
1468+#define X264_CSP_HIGH_DEPTH     0x2000  /* the csp has a depth of 16 bits per pixel component */
1469 
1470 /* Slice type */
1471 #define X264_TYPE_AUTO          0x0000  /* Let x264 choose the right type */
1472@@ -342,7 +343,7 @@ typedef struct x264_param_t
1473     {
1474         int         i_rc_method;    /* X264_RC_* */
1475 
1476-        int         i_qp_constant;  /* 0 to (51 + 6*(BIT_DEPTH-8)) */
1477+        int         i_qp_constant;  /* 0 to (51 + 6*(x264_bit_depth-8)) */
1478         int         i_qp_min;       /* min allowed QP value */
1479         int         i_qp_max;       /* max allowed QP value */
1480         int         i_qp_step;      /* max QP step between frames */
1481@@ -566,6 +567,15 @@ int     x264_param_apply_profile( x264_param_t *, const char *profile );
1482  * Picture structures and functions
1483  ****************************************************************************/
1484 
1485+/* x264_bit_depth:
1486+ *      Specifies the number of bits per pixel that x264 uses. This is also the
1487+ *      bit depth that x264 encodes in. If this value is > 8, x264 will read
1488+ *      two bytes of input data for each pixel sample, and expect the upper
1489+ *      (16-x264_bit_depth) bits to be zero.
1490+ *      Note: The flag X264_CSP_HIGH_DEPTH must be used to specify the
1491+ *      colorspace depth as well. */
1492+extern const int x264_bit_depth;
1493+
1494 enum pic_struct_e
1495 {
1496     PIC_STRUCT_AUTO              = 0, // automatically decide (default)
1497-- 
14981.7.2.3
1499
1500
1501From 195cf9bd51203eb18cdff5542b27caf635e7b1cf Mon Sep 17 00:00:00 2001
1502From: Jason Garrett-Glaser <darkshikari@gmail.com>
1503Date: Sun, 26 Sep 2010 21:04:30 -0700
1504Subject: [PATCH 8/8] Add High 10 Intra profile support (AVC-Intra)
1505 x264 should now be able to encode compliant AVC-Intra 50.
1506 With a 10-bit-compiled version of x264, a sample commandline for 1080i25 might be:
1507 --interlaced --keyint 1 --vbv-bufsize 2000 --bitrate 50000 --vbv-maxrate 50000 --nal-hrd cbr
1508
1509Also print "Constrained Baseline" for baseline profile, since that's all x264 (and everything else in the world) supports.
1510Also reorganize parameter validation a bit to reduce some spurious warnings.
1511---
1512 encoder/encoder.c |   14 +++++++++-----
1513 encoder/set.c     |   15 ++++++++++-----
1514 2 files changed, 19 insertions(+), 10 deletions(-)
1515
1516diff --git a/encoder/encoder.c b/encoder/encoder.c
1517index 62a4350..a1e8383 100644
1518--- a/encoder/encoder.c
1519+++ b/encoder/encoder.c
1520@@ -439,6 +439,13 @@ static int x264_validate_parameters( x264_t *h )
1521     if( h->i_thread_frames > 1 )
1522         h->param.nalu_process = NULL;
1523 
1524+    h->param.i_keyint_max = x264_clip3( h->param.i_keyint_max, 1, X264_KEYINT_MAX_INFINITE );
1525+    if( h->param.i_keyint_max == 1 )
1526+    {
1527+        h->param.b_intra_refresh = 0;
1528+        h->param.analyse.i_weighted_pred = 0;
1529+    }
1530+
1531     if( h->param.b_interlaced )
1532     {
1533         if( h->param.analyse.i_me_method >= X264_ME_ESA )
1534@@ -576,7 +583,6 @@ static int x264_validate_parameters( x264_t *h )
1535 
1536     h->param.i_frame_reference = x264_clip3( h->param.i_frame_reference, 1, X264_REF_MAX );
1537     h->param.i_dpb_size = x264_clip3( h->param.i_dpb_size, 1, X264_REF_MAX );
1538-    h->param.i_keyint_max = x264_clip3( h->param.i_keyint_max, 1, X264_KEYINT_MAX_INFINITE );
1539     if( h->param.i_scenecut_threshold < 0 )
1540         h->param.i_scenecut_threshold = 0;
1541     if( !h->param.analyse.i_subpel_refine && h->param.analyse.i_direct_mv_pred > X264_DIRECT_PRED_SPATIAL )
1542@@ -586,8 +592,6 @@ static int x264_validate_parameters( x264_t *h )
1543     }
1544     h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_MIN( X264_BFRAME_MAX, h->param.i_keyint_max-1 ) );
1545     h->param.i_open_gop = x264_clip3( h->param.i_open_gop, X264_OPEN_GOP_NONE, X264_OPEN_GOP_BLURAY );
1546-    if( h->param.i_keyint_max == 1 )
1547-        h->param.b_intra_refresh = 0;
1548     h->param.i_bframe_bias = x264_clip3( h->param.i_bframe_bias, -90, 100 );
1549     if( h->param.i_bframe <= 1 )
1550         h->param.i_bframe_pyramid = X264_B_PYRAMID_NONE;
1551@@ -1155,10 +1159,10 @@ x264_t *x264_encoder_open( x264_param_t *param )
1552         fclose( f );
1553     }
1554 
1555-    const char *profile = h->sps->i_profile_idc == PROFILE_BASELINE ? "Baseline" :
1556+    const char *profile = h->sps->i_profile_idc == PROFILE_BASELINE ? "Constrained Baseline" :
1557                           h->sps->i_profile_idc == PROFILE_MAIN ? "Main" :
1558                           h->sps->i_profile_idc == PROFILE_HIGH ? "High" :
1559-                          h->sps->i_profile_idc == PROFILE_HIGH10 ? "High 10" :
1560+                          h->sps->i_profile_idc == PROFILE_HIGH10 ? (h->sps->b_constraint_set3 == 1 ? "High 10 Intra" : "High 10") :
1561                           "High 4:4:4 Predictive";
1562     char level[4];
1563     snprintf( level, sizeof(level), "%d.%d", h->sps->i_level_idc/10, h->sps->i_level_idc%10 );
1564diff --git a/encoder/set.c b/encoder/set.c
1565index a003012..0a24bf7 100644
1566--- a/encoder/set.c
1567+++ b/encoder/set.c
1568@@ -121,17 +121,17 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
1569     sps->b_constraint_set1  = sps->i_profile_idc <= PROFILE_MAIN;
1570     /* Never set constraint_set2, it is not necessary and not used in real world. */
1571     sps->b_constraint_set2  = 0;
1572+    sps->b_constraint_set3  = 0;
1573 
1574+    sps->i_level_idc = param->i_level_idc;
1575     if( param->i_level_idc == 9 && ( sps->i_profile_idc >= PROFILE_BASELINE && sps->i_profile_idc <= PROFILE_EXTENDED ) )
1576     {
1577         sps->b_constraint_set3 = 1; /* level 1b with Baseline, Main or Extended profile is signalled via constraint_set3 */
1578         sps->i_level_idc      = 11;
1579     }
1580-    else
1581-    {
1582-        sps->b_constraint_set3 = 0;
1583-        sps->i_level_idc = param->i_level_idc;
1584-    }
1585+    /* High 10 Intra profile */
1586+    if( param->i_keyint_max == 1 && sps->i_profile_idc == PROFILE_HIGH10 )
1587+        sps->b_constraint_set3 = 1;
1588 
1589     sps->vui.i_num_reorder_frames = param->i_bframe_pyramid ? 2 : param->i_bframe ? 1 : 0;
1590     /* extra slot with pyramid so that we don't have to override the
1591@@ -140,6 +140,11 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
1592     sps->i_num_ref_frames = X264_MIN(X264_REF_MAX, X264_MAX4(param->i_frame_reference, 1 + sps->vui.i_num_reorder_frames,
1593                             param->i_bframe_pyramid ? 4 : 1, param->i_dpb_size));
1594     sps->i_num_ref_frames -= param->i_bframe_pyramid == X264_B_PYRAMID_STRICT;
1595+    if( param->i_keyint_max == 1 )
1596+    {
1597+        sps->i_num_ref_frames = 0;
1598+        sps->vui.i_max_dec_frame_buffering = 0;
1599+    }
1600 
1601     /* number of refs + current frame */
1602     int max_frame_num = sps->vui.i_max_dec_frame_buffering * (!!param->i_bframe_pyramid+1) + 1;
1603-- 
16041.7.2.3