· 8 years ago · Jun 01, 2017, 06:40 PM
1From 4fa7d539c1a09a0779194c7971495e966ff7de1e Mon Sep 17 00:00:00 2001
2From: Jason Garrett-Glaser <darkshikari@gmail.com>
3Date: Tue, 15 Jun 2010 05:15:42 -0700
4Subject: [PATCH 01/10] Fix compilation on ARM w/ Apple ABI
5
6---
7 encoder/me.c | 2 +-
8 1 files changed, 1 insertions(+), 1 deletions(-)
9
10diff --git a/encoder/me.c b/encoder/me.c
11index 2914eb3..291104a 100644
12--- a/encoder/me.c
13+++ b/encoder/me.c
14@@ -245,7 +245,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
15 pmv = pack16to32_mask( bmx, bmy );
16 if( i_mvc > 0 )
17 {
18- ALIGNED_ARRAY_8( int16_t, mvc_fpel,[16][2] );
19+ ALIGNED_ARRAY_8( int16_t, mvc_fpel,[16],[2] );
20 x264_predictor_roundclip( mvc_fpel, mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
21 bcost <<= 4;
22 for( int i = 1; i <= i_mvc; i++ )
23--
241.7.0.4
25
26
27From 69e9d85c292cb9daa96664657352bf6c65af5825 Mon Sep 17 00:00:00 2001
28From: Anton Mitrofanov <BugMaster@narod.ru>
29Date: Sat, 19 Jun 2010 01:44:56 +0400
30Subject: [PATCH 02/10] Fix SIGPIPEs caused by is_regular_file checks
31 Check to see if input file is a pipe without opening it.
32
33---
34 common/osdep.h | 10 +++++++++-
35 x264.c | 1 +
36 2 files changed, 10 insertions(+), 1 deletions(-)
37
38diff --git a/common/osdep.h b/common/osdep.h
39index b1b357c..b3a8cd6 100644
40--- a/common/osdep.h
41+++ b/common/osdep.h
42@@ -290,7 +290,15 @@ static inline uint8_t x264_is_regular_file( FILE *filehandle )
43 {
44 struct stat file_stat;
45 if( fstat( fileno( filehandle ), &file_stat ) )
46- return 0;
47+ return -1;
48+ return S_ISREG( file_stat.st_mode );
49+}
50+
51+static inline uint8_t x264_is_regular_file_path( const char *filename )
52+{
53+ struct stat file_stat;
54+ if( stat( filename, &file_stat ) )
55+ return -1;
56 return S_ISREG( file_stat.st_mode );
57 }
58
59diff --git a/x264.c b/x264.c
60index a124083..09bad61 100644
61--- a/x264.c
62+++ b/x264.c
63@@ -806,6 +806,7 @@ static int select_input( const char *demuxer, char *used_demuxer, char *filename
64 int b_auto = !strcasecmp( demuxer, "auto" );
65 if( !b_regular && b_auto )
66 ext = "yuv";
67+ b_regular = b_regular && x264_is_regular_file_path( filename );
68 if( b_regular )
69 {
70 FILE *f = fopen( filename, "r" );
71--
721.7.0.4
73
74
75From 30496668b92548939e8af265f65477bc014d2f22 Mon Sep 17 00:00:00 2001
76From: Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
77Date: Fri, 18 Jun 2010 14:57:52 -0700
78Subject: [PATCH 03/10] Properly close qpfile on errors
79
80---
81 encoder/encoder.c | 1 +
82 input/avs.c | 1 +
83 output/mp4.c | 1 +
84 x264.c | 6 ++++++
85 4 files changed, 9 insertions(+), 0 deletions(-)
86
87diff --git a/encoder/encoder.c b/encoder/encoder.c
88index 08a28bd..0c02d74 100644
89--- a/encoder/encoder.c
90+++ b/encoder/encoder.c
91@@ -1109,6 +1109,7 @@ x264_t *x264_encoder_open( x264_param_t *param )
92 else if( !x264_is_regular_file( f ) )
93 {
94 x264_log( h, X264_LOG_ERROR, "dump_yuv: incompatible with non-regular file %s\n", h->param.psz_dump_yuv );
95+ fclose( f );
96 goto fail;
97 }
98 fclose( f );
99diff --git a/input/avs.c b/input/avs.c
100index 07add40..849c465 100644
101--- a/input/avs.c
102+++ b/input/avs.c
103@@ -134,6 +134,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
104 else if( !x264_is_regular_file( fh ) )
105 {
106 fprintf( stderr, "avs [error]: AVS input is incompatible with non-regular file `%s'\n", psz_filename );
107+ fclose( fh );
108 return -1;
109 }
110 fclose( fh );
111diff --git a/output/mp4.c b/output/mp4.c
112index 0e3c2fc..9b35a2f 100644
113--- a/output/mp4.c
114+++ b/output/mp4.c
115@@ -166,6 +166,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle )
116 else if( !x264_is_regular_file( fh ) )
117 {
118 fprintf( stderr, "mp4 [error]: MP4 output is incompatible with non-regular file `%s'\n", psz_filename );
119+ fclose( fh );
120 return -1;
121 }
122 fclose( fh );
123diff --git a/x264.c b/x264.c
124index 09bad61..f1b55d4 100644
125--- a/x264.c
126+++ b/x264.c
127@@ -1587,6 +1587,12 @@ static int Encode( x264_param_t *param, cli_opt_t *opt )
128 if( b_ctrl_c )
129 fprintf( stderr, "aborted at input frame %d, output frame %d\n", opt->i_seek + i_frame, i_frame_output );
130
131+ if( opt->qpfile )
132+ {
133+ fclose( opt->qpfile );
134+ opt->qpfile = NULL;
135+ }
136+
137 if( opt->tcfile_out )
138 {
139 fclose( opt->tcfile_out );
140--
1411.7.0.4
142
143
144From 2f77b57feeec45605f517657fa7e2ccb5ce62e35 Mon Sep 17 00:00:00 2001
145From: Jason Garrett-Glaser <darkshikari@gmail.com>
146Date: Tue, 22 Jun 2010 14:20:46 -0700
147Subject: [PATCH 04/10] Use -fno-tree-vectorize to avoid miscompilation
148 Some versions of gcc have been reported to attempt (and fail) to vectorize a loop in plane_expand_border.
149 This results in a segfualt, so to limit the possible effects of gcc's utter incompetence, we're turning off vectorization entirely. It's not like it ever did anything useful to begin with.
150
151---
152 configure | 4 ++++
153 1 files changed, 4 insertions(+), 0 deletions(-)
154
155diff --git a/configure b/configure
156index 3a38cb5..24d15ad 100755
157--- a/configure
158+++ b/configure
159@@ -628,6 +628,10 @@ else
160 CFLAGS="-O3 -ffast-math $CFLAGS"
161 fi
162
163+if cc_check '' -fno-tree-vectorize ; then
164+ CFLAGS="$CFLAGS -fno-tree-vectorize"
165+fi
166+
167 if cc_check "stdio.h" "" "fseeko(stdin,0,0);" ; then
168 define fseek fseeko
169 define ftell ftello
170--
1711.7.0.4
172
173
174From c9d85577006b956d0f96ca716aef56097ab409fa Mon Sep 17 00:00:00 2001
175From: Jason Garrett-Glaser <darkshikari@gmail.com>
176Date: Sat, 19 Jun 2010 03:27:33 -0700
177Subject: [PATCH 05/10] Improve HRD accuracy
178 In a staggering display of brain damage, the spec requires all HRD math to be done in infinite precision despite the output being of quite limited precision.
179 Accordingly, convert buffer management to work in units of timescale.
180 These accumulating rounding errors probably didn't cause any real problems, but might in theory cause issues in very picky muxers on extremely long-running streams.
181
182---
183 common/common.c | 36 ++++++++++++++-----------
184 common/common.h | 1 +
185 encoder/encoder.c | 3 +-
186 encoder/ratecontrol.c | 67 ++++++++++++++++++++++++++++++++-----------------
187 encoder/ratecontrol.h | 2 +-
188 5 files changed, 67 insertions(+), 42 deletions(-)
189
190diff --git a/common/common.c b/common/common.c
191index 4fa5e4b..cb1aa9c 100644
192--- a/common/common.c
193+++ b/common/common.c
194@@ -1080,24 +1080,28 @@ void x264_free( void *p )
195 /****************************************************************************
196 * x264_reduce_fraction:
197 ****************************************************************************/
198-void x264_reduce_fraction( uint32_t *n, uint32_t *d )
199-{
200- uint32_t a = *n;
201- uint32_t b = *d;
202- uint32_t c;
203- if( !a || !b )
204- return;
205- c = a % b;
206- while(c)
207- {
208- a = b;
209- b = c;
210- c = a % b;
211- }
212- *n /= b;
213- *d /= b;
214+#define REDUCE_FRACTION( name, type )\
215+void name( type *n, type *d )\
216+{ \
217+ type a = *n; \
218+ type b = *d; \
219+ type c; \
220+ if( !a || !b ) \
221+ return; \
222+ c = a % b; \
223+ while( c ) \
224+ { \
225+ a = b; \
226+ b = c; \
227+ c = a % b; \
228+ } \
229+ *n /= b; \
230+ *d /= b; \
231 }
232
233+REDUCE_FRACTION( x264_reduce_fraction, uint32_t )
234+REDUCE_FRACTION( x264_reduce_fraction64, uint64_t )
235+
236 /****************************************************************************
237 * x264_slurp_file:
238 ****************************************************************************/
239diff --git a/common/common.h b/common/common.h
240index abb5db2..3d522eb 100644
241--- a/common/common.h
242+++ b/common/common.h
243@@ -183,6 +183,7 @@ char *x264_param2string( x264_param_t *p, int b_res );
244 void x264_log( x264_t *h, int i_level, const char *psz_fmt, ... );
245
246 void x264_reduce_fraction( uint32_t *n, uint32_t *d );
247+void x264_reduce_fraction64( uint64_t *n, uint64_t *d );
248 void x264_init_vlc_tables();
249
250 static ALWAYS_INLINE pixel x264_clip_pixel( int x )
251diff --git a/encoder/encoder.c b/encoder/encoder.c
252index 0c02d74..764be11 100644
253--- a/encoder/encoder.c
254+++ b/encoder/encoder.c
255@@ -2570,8 +2570,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
256 /* generate sei buffering period and insert it into place */
257 if( h->fenc->b_keyframe && h->sps->vui.b_nal_hrd_parameters_present )
258 {
259- h->initial_cpb_removal_delay = x264_hrd_fullness( h );
260-
261+ x264_hrd_fullness( h );
262 x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
263 x264_sei_buffering_period_write( h, &h->out.bs );
264 if( x264_nal_end( h ) )
265diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
266index 2c05ad7..aef5083 100644
267--- a/encoder/ratecontrol.c
268+++ b/encoder/ratecontrol.c
269@@ -91,7 +91,7 @@ struct x264_ratecontrol_t
270
271 /* VBV stuff */
272 double buffer_size;
273- double buffer_fill_final; /* real buffer as of the last finished frame */
274+ int64_t buffer_fill_final;
275 double buffer_fill; /* planned buffer, if all in-progress frames hit their bit budget */
276 double buffer_rate; /* # of bits added to buffer_fill after each frame */
277 double vbv_max_rate; /* # of bits added to buffer_fill per second */
278@@ -157,6 +157,7 @@ struct x264_ratecontrol_t
279 int initial_cpb_removal_delay_offset;
280 double nrt_first_access_unit; /* nominal removal time */
281 double previous_cpb_final_arrival_time;
282+ uint64_t hrd_multiply_denom;
283 };
284
285
286@@ -463,6 +464,8 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
287 int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * 1000;
288
289 /* Init HRD */
290+ h->sps->vui.hrd.i_bit_rate_unscaled = vbv_max_bitrate;
291+ h->sps->vui.hrd.i_cpb_size_unscaled = vbv_buffer_size;
292 if( h->param.i_nal_hrd && b_init )
293 {
294 h->sps->vui.hrd.i_cpb_cnt = 1;
295@@ -499,8 +502,8 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
296
297 #undef MAX_DURATION
298
299- vbv_buffer_size = X264_MIN( vbv_buffer_size, h->sps->vui.hrd.i_cpb_size_unscaled );
300- vbv_max_bitrate = X264_MIN( vbv_max_bitrate, h->sps->vui.hrd.i_bit_rate_unscaled );
301+ vbv_buffer_size = h->sps->vui.hrd.i_cpb_size_unscaled;
302+ vbv_max_bitrate = h->sps->vui.hrd.i_bit_rate_unscaled;
303 }
304 else if( h->param.i_nal_hrd && !b_init )
305 {
306@@ -528,7 +531,7 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
307 if( h->param.rc.f_vbv_buffer_init > 1. )
308 h->param.rc.f_vbv_buffer_init = x264_clip3f( h->param.rc.f_vbv_buffer_init / h->param.rc.i_vbv_buffer_size, 0, 1 );
309 h->param.rc.f_vbv_buffer_init = x264_clip3f( X264_MAX( h->param.rc.f_vbv_buffer_init, rc->buffer_rate / rc->buffer_size ), 0, 1);
310- rc->buffer_fill_final = rc->buffer_size * h->param.rc.f_vbv_buffer_init;
311+ rc->buffer_fill_final = rc->buffer_size * h->param.rc.f_vbv_buffer_init * h->sps->vui.i_time_scale;
312 rc->b_vbv = 1;
313 rc->b_vbv_min_rate = !rc->b_2pass
314 && h->param.rc.i_rc_method == X264_RC_ABR
315@@ -577,6 +580,23 @@ int x264_ratecontrol_new( x264_t *h )
316
317 x264_ratecontrol_init_reconfigurable( h, 1 );
318
319+ if( h->param.i_nal_hrd )
320+ {
321+ uint64_t denom = (uint64_t)h->sps->vui.hrd.i_bit_rate_unscaled * h->sps->vui.i_time_scale;
322+ uint64_t num = 180000;
323+ x264_reduce_fraction64( &num, &denom );
324+ rc->hrd_multiply_denom = 180000 / num;
325+
326+ double bits_required = log2( 180000 / rc->hrd_multiply_denom )
327+ + log2( h->sps->vui.i_time_scale )
328+ + log2( h->sps->vui.hrd.i_cpb_size_unscaled );
329+ if( bits_required >= 63 )
330+ {
331+ x264_log( h, X264_LOG_ERROR, "HRD with very large timescale and bufsize not supported\n" );
332+ return -1;
333+ }
334+ }
335+
336 if( rc->rate_tolerance < 0.01 )
337 {
338 x264_log(h, X264_LOG_WARNING, "bitrate tolerance too small, using .01\n");
339@@ -1722,9 +1742,10 @@ static void update_predictor( predictor_t *p, double q, double var, double bits
340 static int update_vbv( x264_t *h, int bits )
341 {
342 int filler = 0;
343-
344+ int bitrate = h->sps->vui.hrd.i_bit_rate_unscaled;
345 x264_ratecontrol_t *rcc = h->rc;
346 x264_ratecontrol_t *rct = h->thread[0]->rc;
347+ uint64_t buffer_size = (uint64_t)h->sps->vui.hrd.i_cpb_size_unscaled * h->sps->vui.i_time_scale;
348
349 if( rcc->last_satd >= h->mb.i_mb_count )
350 update_predictor( &rct->pred[h->sh.i_type], qp2qscale( rcc->qpa_rc ), rcc->last_satd, bits );
351@@ -1732,48 +1753,48 @@ static int update_vbv( x264_t *h, int bits )
352 if( !rcc->b_vbv )
353 return filler;
354
355- rct->buffer_fill_final -= bits;
356+ rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
357
358 if( rct->buffer_fill_final < 0 )
359- x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, rct->buffer_fill_final );
360+ x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, (double)rct->buffer_fill_final / h->sps->vui.i_time_scale );
361 rct->buffer_fill_final = X264_MAX( rct->buffer_fill_final, 0 );
362- rct->buffer_fill_final += rcc->buffer_rate;
363+ rct->buffer_fill_final += (uint64_t)bitrate * h->sps->vui.i_num_units_in_tick * h->fenc->i_cpb_duration;
364
365- if( h->sps->vui.hrd.b_cbr_hrd && rct->buffer_fill_final > rcc->buffer_size )
366+ if( h->sps->vui.hrd.b_cbr_hrd && rct->buffer_fill_final > buffer_size )
367 {
368- filler = ceil( (rct->buffer_fill_final - rcc->buffer_size) / 8 );
369- rct->buffer_fill_final -= X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
370+ filler = ceil( (rct->buffer_fill_final - buffer_size) / (8. * h->sps->vui.i_time_scale) );
371+ bits = X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
372+ rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
373 }
374 else
375- rct->buffer_fill_final = X264_MIN( rct->buffer_fill_final, rcc->buffer_size );
376+ rct->buffer_fill_final = X264_MIN( rct->buffer_fill_final, buffer_size );
377
378 return filler;
379 }
380
381-int x264_hrd_fullness( x264_t *h )
382+void x264_hrd_fullness( x264_t *h )
383 {
384 x264_ratecontrol_t *rct = h->thread[0]->rc;
385- double cpb_bits = rct->buffer_fill_final;
386- double bps = h->sps->vui.hrd.i_bit_rate_unscaled;
387- double cpb_size = h->sps->vui.hrd.i_cpb_size_unscaled;
388- double cpb_fullness = 90000.0*cpb_bits/bps;
389+ uint64_t denom = (uint64_t)h->sps->vui.hrd.i_bit_rate_unscaled * h->sps->vui.i_time_scale / rct->hrd_multiply_denom;
390+ uint64_t cpb_state = rct->buffer_fill_final;
391+ uint64_t cpb_size = (uint64_t)h->sps->vui.hrd.i_cpb_size_unscaled * h->sps->vui.i_time_scale;
392+ uint64_t multiply_factor = 180000 / rct->hrd_multiply_denom;
393
394- if( cpb_bits < 0 || cpb_bits > cpb_size )
395+ if( cpb_state < 0 || cpb_state > cpb_size )
396 {
397 x264_log( h, X264_LOG_WARNING, "CPB %s: %.0lf bits in a %.0lf-bit buffer\n",
398- cpb_bits < 0 ? "underflow" : "overflow", cpb_bits, cpb_size );
399+ cpb_state < 0 ? "underflow" : "overflow", (float)cpb_state/denom, (float)cpb_size/denom );
400 }
401
402- h->initial_cpb_removal_delay_offset = 90000.0*(cpb_size - cpb_bits)/bps;
403-
404- return x264_clip3f( cpb_fullness + 0.5, 0, 90000.0*cpb_size/bps ); // just lie if we are in a weird state
405+ h->initial_cpb_removal_delay = (multiply_factor * cpb_state + denom) / (2*denom);
406+ h->initial_cpb_removal_delay_offset = (multiply_factor * cpb_size + denom) / (2*denom) - h->initial_cpb_removal_delay;
407 }
408
409 // provisionally update VBV according to the planned size of all frames currently in progress
410 static void update_vbv_plan( x264_t *h, int overhead )
411 {
412 x264_ratecontrol_t *rcc = h->rc;
413- rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final;
414+ rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final / h->sps->vui.i_time_scale;
415 if( h->i_thread_frames > 1 )
416 {
417 int j = h->rc - h->thread[0]->rc;
418diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h
419index dd139eb..f39c070 100644
420--- a/encoder/ratecontrol.h
421+++ b/encoder/ratecontrol.h
422@@ -47,6 +47,6 @@ int x264_rc_analyse_slice( x264_t *h );
423 int x264_weighted_reference_duplicate( x264_t *h, int i_ref, const x264_weight_t *w );
424 void x264_threads_distribute_ratecontrol( x264_t *h );
425 void x264_threads_merge_ratecontrol( x264_t *h );
426-int x264_hrd_fullness( x264_t *h );
427+void x264_hrd_fullness( x264_t *h );
428 #endif
429
430--
4311.7.0.4
432
433
434From ee45e5c4f4b4e56e7424d3d025c2ab9766aa4cb1 Mon Sep 17 00:00:00 2001
435From: Jason Garrett-Glaser <darkshikari@gmail.com>
436Date: Fri, 18 Jun 2010 13:58:11 -0700
437Subject: [PATCH 06/10] SSE4 and SSSE3 versions of some intra_sad functions
438 Primarily Nehalem-optimized.
439
440---
441 common/pixel.c | 3 +
442 common/x86/pixel.h | 2 +
443 common/x86/sad-a.asm | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++
444 3 files changed, 112 insertions(+), 0 deletions(-)
445
446diff --git a/common/pixel.c b/common/pixel.c
447index a8cb1df..8441c7a 100644
448--- a/common/pixel.c
449+++ b/common/pixel.c
450@@ -856,6 +856,9 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
451 }
452 pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_sse4;
453 pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_sse4;
454+ pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_sse4;
455+ /* Slower on Conroe, so only enable under SSE4 */
456+ pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_ssse3;
457 }
458 #endif //HAVE_MMX
459
460diff --git a/common/x86/pixel.h b/common/x86/pixel.h
461index 9bba683..b1b916d 100644
462--- a/common/x86/pixel.h
463+++ b/common/x86/pixel.h
464@@ -80,6 +80,7 @@ DECL_PIXELS( uint64_t, hadamard_ac, sse4, ( uint8_t *pix, int i_stride ))
465 void x264_intra_satd_x3_4x4_mmxext ( uint8_t *, uint8_t *, int * );
466 void x264_intra_satd_x3_4x4_ssse3 ( uint8_t *, uint8_t *, int * );
467 void x264_intra_sad_x3_4x4_mmxext ( uint8_t *, uint8_t *, int * );
468+void x264_intra_sad_x3_4x4_sse4 ( uint8_t *, uint8_t *, int * );
469 void x264_intra_satd_x3_8x8c_mmxext ( uint8_t *, uint8_t *, int * );
470 void x264_intra_satd_x3_8x8c_ssse3 ( uint8_t *, uint8_t *, int * );
471 void x264_intra_sad_x3_8x8c_mmxext ( uint8_t *, uint8_t *, int * );
472@@ -93,6 +94,7 @@ void x264_intra_sa8d_x3_8x8_mmxext ( uint8_t *, uint8_t *, int * );
473 void x264_intra_sa8d_x3_8x8_sse2 ( uint8_t *, uint8_t *, int * );
474 void x264_intra_sa8d_x3_8x8_ssse3 ( uint8_t *, uint8_t *, int * );
475 void x264_intra_sad_x3_8x8_mmxext ( uint8_t *, uint8_t *, int * );
476+void x264_intra_sad_x3_8x8_ssse3 ( uint8_t *, uint8_t *, int * );
477 void x264_intra_sa8d_x3_8x8_core_mmxext( uint8_t *, int16_t [2][8], int * );
478 void x264_intra_sa8d_x3_8x8_core_sse2 ( uint8_t *, int16_t [2][8], int * );
479 void x264_intra_sa8d_x3_8x8_core_ssse3 ( uint8_t *, int16_t [2][8], int * );
480diff --git a/common/x86/sad-a.asm b/common/x86/sad-a.asm
481index 72c1789..10a365c 100644
482--- a/common/x86/sad-a.asm
483+++ b/common/x86/sad-a.asm
484@@ -26,6 +26,19 @@
485 %include "x86inc.asm"
486 %include "x86util.asm"
487
488+SECTION_RODATA
489+
490+h4x4_pred_shuf: db 3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15
491+h4x4_pred_shuf2: db 3,7,11,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
492+h8x8_pred_shuf: times 8 db 1
493+ times 8 db 0
494+ times 8 db 3
495+ times 8 db 2
496+ times 8 db 5
497+ times 8 db 4
498+ times 8 db 7
499+ times 8 db 6
500+
501 SECTION .text
502
503 cextern pb_3
504@@ -303,6 +316,40 @@ cglobal intra_sad_x3_4x4_mmxext, 3,3
505 movd [r2+4], mm1 ;H prediction cost
506 RET
507
508+cglobal intra_sad_x3_4x4_sse4, 3,3
509+ movd xmm4, [r1+FDEC_STRIDE*0-4]
510+ pinsrd xmm4, [r1+FDEC_STRIDE*1-4], 1
511+ pinsrd xmm4, [r1+FDEC_STRIDE*2-4], 2
512+ pinsrd xmm4, [r1+FDEC_STRIDE*3-4], 3
513+ movd xmm2, [r1-FDEC_STRIDE]
514+ pxor xmm3, xmm3
515+ movdqa xmm5, xmm4
516+ pshufb xmm4, [h4x4_pred_shuf2] ; EFGH
517+ pshufb xmm5, [h4x4_pred_shuf] ; EEEEFFFFGGGGHHHH
518+ pshufd xmm0, xmm2, 0 ; ABCDABCDABCDABCD
519+ punpckldq xmm2, xmm4 ; ABCDEFGH
520+ psadbw xmm2, xmm3
521+ movd xmm1, [r0+FENC_STRIDE*0]
522+ pinsrd xmm1, [r0+FENC_STRIDE*1], 1
523+ pinsrd xmm1, [r0+FENC_STRIDE*2], 2
524+ pinsrd xmm1, [r0+FENC_STRIDE*3], 3
525+ psadbw xmm0, xmm1
526+ psadbw xmm5, xmm1
527+ psraw xmm2, 2
528+ pavgw xmm2, xmm3
529+ pshufb xmm2, xmm3 ; DC prediction
530+ movdqa xmm3, xmm0
531+ punpcklqdq xmm0, xmm5
532+ punpckhqdq xmm3, xmm5
533+ psadbw xmm2, xmm1
534+ paddw xmm0, xmm3
535+ movhlps xmm4, xmm2
536+ packusdw xmm0, xmm0
537+ paddw xmm2, xmm4
538+ movq [r2], xmm0 ; V/H prediction costs
539+ movd [r2+8], xmm2 ; DC prediction cost
540+ RET
541+
542 ;-----------------------------------------------------------------------------
543 ; void intra_sad_x3_8x8( uint8_t *fenc, uint8_t edge[33], int res[3]);
544 ;-----------------------------------------------------------------------------
545@@ -370,6 +417,66 @@ cglobal intra_sad_x3_8x8_mmxext, 3,3
546 movd [r2+8], m1
547 RET
548
549+INIT_XMM
550+cglobal intra_sad_x3_8x8_ssse3, 3,4,9
551+%ifdef PIC
552+ lea r11, [h8x8_pred_shuf]
553+%define shuf r11
554+%else
555+%define shuf h8x8_pred_shuf
556+%endif
557+ movq m0, [r1+7] ; left pixels
558+ movq m1, [r1+16] ; top pixels
559+ pxor m2, m2
560+ pxor m3, m3
561+ psadbw m2, m0
562+ psadbw m3, m1
563+ paddw m2, m3
564+ pxor m3, m3 ; V score accumulator
565+ psraw m2, 3
566+ pavgw m2, m3
567+ punpcklqdq m1, m1 ; V prediction
568+ pshufb m2, m3 ; DC prediction
569+ pxor m4, m4 ; H score accumulator
570+ pxor m5, m5 ; DC score accumulator
571+ mov r3d, 6
572+.loop:
573+ movq m6, [r0+FENC_STRIDE*0]
574+ movhps m6, [r0+FENC_STRIDE*1]
575+ movdqa m7, m0
576+ pshufb m7, [shuf+r3*8] ; H prediction
577+%ifdef ARCH_X86_64
578+ movdqa m8, m1
579+ psadbw m7, m6
580+ psadbw m8, m6
581+ psadbw m6, m2
582+ paddw m4, m7
583+ paddw m3, m8
584+ paddw m5, m6
585+%else
586+ psadbw m7, m6
587+ paddw m4, m7
588+ movdqa m7, m1
589+ psadbw m7, m6
590+ psadbw m6, m2
591+ paddw m3, m7
592+ paddw m5, m6
593+%endif
594+ add r0, FENC_STRIDE*2
595+ sub r3d, 2
596+ jge .loop
597+
598+ movhlps m0, m3
599+ movhlps m1, m4
600+ movhlps m2, m5
601+ paddw m3, m0
602+ paddw m4, m1
603+ paddw m5, m2
604+ movd [r2+0], m3
605+ movd [r2+4], m4
606+ movd [r2+8], m5
607+ RET
608+
609 ;-----------------------------------------------------------------------------
610 ; void intra_sad_x3_8x8c( uint8_t *fenc, uint8_t *fdec, int res[3] );
611 ;-----------------------------------------------------------------------------
612--
6131.7.0.4
614
615
616From e683053fb10af2ad29f6b52aad1467c6d42bbd94 Mon Sep 17 00:00:00 2001
617From: Jason Garrett-Glaser <darkshikari@gmail.com>
618Date: Sat, 19 Jun 2010 01:41:07 -0700
619Subject: [PATCH 07/10] Improve 2-pass bitrate prediction
620 Adapt based on distance to the end in bits, not in frames.
621 Helps in videos with absurdly simple end sections, e.g. black frames.
622
623---
624 encoder/ratecontrol.c | 12 +++++++++---
625 1 files changed, 9 insertions(+), 3 deletions(-)
626
627diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
628index aef5083..16afbf0 100644
629--- a/encoder/ratecontrol.c
630+++ b/encoder/ratecontrol.c
631@@ -2034,9 +2034,6 @@ static float rate_estimate_qscale( x264_t *h )
632 double lmax = rcc->lmax[pict_type];
633 int64_t diff;
634 int64_t predicted_bits = total_bits;
635- /* Adjust ABR buffer based on distance to the end of the video. */
636- if( rcc->num_entries > h->i_frame )
637- abr_buffer *= 0.5 * sqrt( rcc->num_entries - h->i_frame );
638
639 if( rcc->b_vbv )
640 {
641@@ -2062,6 +2059,15 @@ static float rate_estimate_qscale( x264_t *h )
642 predicted_bits += (int64_t)(h->i_thread_frames - 1) * rcc->bitrate / rcc->fps;
643 }
644
645+ /* Adjust ABR buffer based on distance to the end of the video. */
646+ if( rcc->num_entries > h->i_frame )
647+ {
648+ double final_bits = rcc->entry[rcc->num_entries-1].expected_bits;
649+ double video_pos = rce.expected_bits / final_bits;
650+ double scale_factor = sqrt( (1 - video_pos) * rcc->num_entries );
651+ abr_buffer *= 0.5 * X264_MAX( scale_factor, 0.5 );
652+ }
653+
654 diff = predicted_bits - (int64_t)rce.expected_bits;
655 q = rce.new_qscale;
656 q /= x264_clip3f((double)(abr_buffer - diff) / abr_buffer, .5, 2);
657--
6581.7.0.4
659
660
661From 78bda6a6cf8c038539c8a95eff7876597cd6e968 Mon Sep 17 00:00:00 2001
662From: Steven Walters <kemuri9@gmail.com>
663Date: Wed, 9 Jun 2010 18:14:52 -0400
664Subject: [PATCH 08/10] Use threadpools to avoid unnecessary thread creation
665 Tiny performance improvement with fast settings and lots of threads.
666 May help more on some OSs with slow thread creation, like OS X.
667 Unify inconsistent synchronized abbreviations to sync.
668
669---
670 Makefile | 3 +-
671 common/common.h | 10 ++-
672 common/frame.c | 19 +++++-
673 common/frame.h | 9 ++-
674 common/threadpool.c | 163 +++++++++++++++++++++++++++++++++++++++++++++++++++
675 common/threadpool.h | 39 ++++++++++++
676 encoder/encoder.c | 79 ++++++++++++-------------
677 encoder/lookahead.c | 22 ++++----
678 input/thread.c | 17 ++---
679 9 files changed, 288 insertions(+), 73 deletions(-)
680 create mode 100644 common/threadpool.c
681 create mode 100644 common/threadpool.h
682
683diff --git a/Makefile b/Makefile
684index 8074ce5..9837821 100644
685--- a/Makefile
686+++ b/Makefile
687@@ -22,13 +22,14 @@ SRCSO =
688
689 CONFIG := $(shell cat config.h)
690
691-# Optional muxer module sources
692+# Optional module sources
693 ifneq ($(findstring HAVE_AVS, $(CONFIG)),)
694 SRCCLI += input/avs.c
695 endif
696
697 ifneq ($(findstring HAVE_PTHREAD, $(CONFIG)),)
698 SRCCLI += input/thread.c
699+SRCS += common/threadpool.c
700 endif
701
702 ifneq ($(findstring HAVE_LAVF, $(CONFIG)),)
703diff --git a/common/common.h b/common/common.h
704index 3d522eb..60899fe 100644
705--- a/common/common.h
706+++ b/common/common.h
707@@ -160,6 +160,7 @@ static const int x264_scan8[16+2*4+3] =
708 #include "cabac.h"
709 #include "quant.h"
710 #include "cpu.h"
711+#include "threadpool.h"
712
713 /****************************************************************************
714 * General functions
715@@ -365,9 +366,10 @@ typedef struct x264_lookahead_t
716 int i_last_keyframe;
717 int i_slicetype_length;
718 x264_frame_t *last_nonb;
719- x264_synch_frame_list_t ifbuf;
720- x264_synch_frame_list_t next;
721- x264_synch_frame_list_t ofbuf;
722+ x264_pthread_t thread_handle;
723+ x264_sync_frame_list_t ifbuf;
724+ x264_sync_frame_list_t next;
725+ x264_sync_frame_list_t ofbuf;
726 } x264_lookahead_t;
727
728 typedef struct x264_ratecontrol_t x264_ratecontrol_t;
729@@ -378,11 +380,11 @@ struct x264_t
730 x264_param_t param;
731
732 x264_t *thread[X264_THREAD_MAX+1];
733- x264_pthread_t thread_handle;
734 int b_thread_active;
735 int i_thread_phase; /* which thread to use for the next frame */
736 int i_threadslice_start; /* first row in this thread slice */
737 int i_threadslice_end; /* row after the end of this thread slice */
738+ x264_threadpool_t *threadpool;
739
740 /* bitstream output */
741 struct
742diff --git a/common/frame.c b/common/frame.c
743index c5c573f..7c2fce0 100644
744--- a/common/frame.c
745+++ b/common/frame.c
746@@ -517,7 +517,7 @@ void x264_frame_delete_list( x264_frame_t **list )
747 x264_free( list );
748 }
749
750-int x264_synch_frame_list_init( x264_synch_frame_list_t *slist, int max_size )
751+int x264_sync_frame_list_init( x264_sync_frame_list_t *slist, int max_size )
752 {
753 if( max_size < 0 )
754 return -1;
755@@ -533,7 +533,7 @@ fail:
756 return -1;
757 }
758
759-void x264_synch_frame_list_delete( x264_synch_frame_list_t *slist )
760+void x264_sync_frame_list_delete( x264_sync_frame_list_t *slist )
761 {
762 x264_pthread_mutex_destroy( &slist->mutex );
763 x264_pthread_cond_destroy( &slist->cv_fill );
764@@ -541,7 +541,7 @@ void x264_synch_frame_list_delete( x264_synch_frame_list_t *slist )
765 x264_frame_delete_list( slist->list );
766 }
767
768-void x264_synch_frame_list_push( x264_synch_frame_list_t *slist, x264_frame_t *frame )
769+void x264_sync_frame_list_push( x264_sync_frame_list_t *slist, x264_frame_t *frame )
770 {
771 x264_pthread_mutex_lock( &slist->mutex );
772 while( slist->i_size == slist->i_max_size )
773@@ -550,3 +550,16 @@ void x264_synch_frame_list_push( x264_synch_frame_list_t *slist, x264_frame_t *f
774 x264_pthread_mutex_unlock( &slist->mutex );
775 x264_pthread_cond_broadcast( &slist->cv_fill );
776 }
777+
778+x264_frame_t *x264_sync_frame_list_pop( x264_sync_frame_list_t *slist )
779+{
780+ x264_frame_t *frame;
781+ x264_pthread_mutex_lock( &slist->mutex );
782+ while( !slist->i_size )
783+ x264_pthread_cond_wait( &slist->cv_fill, &slist->mutex );
784+ frame = slist->list[ --slist->i_size ];
785+ slist->list[ slist->i_size ] = NULL;
786+ x264_pthread_cond_broadcast( &slist->cv_empty );
787+ x264_pthread_mutex_unlock( &slist->mutex );
788+ return frame;
789+}
790diff --git a/common/frame.h b/common/frame.h
791index 7d252c3..26529ce 100644
792--- a/common/frame.h
793+++ b/common/frame.h
794@@ -154,7 +154,7 @@ typedef struct
795 x264_pthread_mutex_t mutex;
796 x264_pthread_cond_t cv_fill; /* event signaling that the list became fuller */
797 x264_pthread_cond_t cv_empty; /* event signaling that the list became emptier */
798-} x264_synch_frame_list_t;
799+} x264_sync_frame_list_t;
800
801 typedef void (*x264_deblock_inter_t)( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
802 typedef void (*x264_deblock_intra_t)( pixel *pix, int stride, int alpha, int beta );
803@@ -202,9 +202,10 @@ x264_frame_t *x264_frame_pop_unused( x264_t *h, int b_fdec );
804 void x264_frame_sort( x264_frame_t **list, int b_dts );
805 void x264_frame_delete_list( x264_frame_t **list );
806
807-int x264_synch_frame_list_init( x264_synch_frame_list_t *slist, int nelem );
808-void x264_synch_frame_list_delete( x264_synch_frame_list_t *slist );
809-void x264_synch_frame_list_push( x264_synch_frame_list_t *slist, x264_frame_t *frame );
810+int x264_sync_frame_list_init( x264_sync_frame_list_t *slist, int nelem );
811+void x264_sync_frame_list_delete( x264_sync_frame_list_t *slist );
812+void x264_sync_frame_list_push( x264_sync_frame_list_t *slist, x264_frame_t *frame );
813+x264_frame_t *x264_sync_frame_list_pop( x264_sync_frame_list_t *slist );
814
815 #define x264_frame_sort_dts(list) x264_frame_sort(list, 1)
816 #define x264_frame_sort_pts(list) x264_frame_sort(list, 0)
817diff --git a/common/threadpool.c b/common/threadpool.c
818new file mode 100644
819index 0000000..4448ea2
820--- /dev/null
821+++ b/common/threadpool.c
822@@ -0,0 +1,163 @@
823+/*****************************************************************************
824+ * threadpool.c: x264 threadpool module
825+ *****************************************************************************
826+ * Copyright (C) 2010 Steven Walters <kemuri9@gmail.com>
827+ *
828+ * This program is free software; you can redistribute it and/or modify
829+ * it under the terms of the GNU General Public License as published by
830+ * the Free Software Foundation; either version 2 of the License, or
831+ * (at your option) any later version.
832+ *
833+ * This program is distributed in the hope that it will be useful,
834+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
835+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
836+ * GNU General Public License for more details.
837+ *
838+ * You should have received a copy of the GNU General Public License
839+ * along with this program; if not, write to the Free Software
840+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
841+ *****************************************************************************/
842+
843+#include "common.h"
844+
845+typedef struct
846+{
847+ void *(*func)(void *);
848+ void *arg;
849+ void *ret;
850+} x264_threadpool_job_t;
851+
852+struct x264_threadpool_t
853+{
854+ int exit;
855+ int threads;
856+ x264_pthread_t *thread_handle;
857+ void (*init_func)(void *);
858+ void *init_arg;
859+
860+ /* requires a synchronized list structure and associated methods,
861+ so use what is already implemented for frames */
862+ x264_sync_frame_list_t uninit; /* list of jobs that are awaiting use */
863+ x264_sync_frame_list_t run; /* list of jobs that are queued for processing by the pool */
864+ x264_sync_frame_list_t done; /* list of jobs that have finished processing */
865+};
866+
867+static void x264_threadpool_thread( x264_threadpool_t *pool )
868+{
869+ if( pool->init_func )
870+ pool->init_func( pool->init_arg );
871+
872+ while( !pool->exit )
873+ {
874+ x264_threadpool_job_t *job = NULL;
875+ x264_pthread_mutex_lock( &pool->run.mutex );
876+ while( !pool->exit && !pool->run.i_size )
877+ x264_pthread_cond_wait( &pool->run.cv_fill, &pool->run.mutex );
878+ if( pool->run.i_size )
879+ {
880+ job = (void*)x264_frame_shift( pool->run.list );
881+ pool->run.i_size--;
882+ }
883+ x264_pthread_mutex_unlock( &pool->run.mutex );
884+ if( !job )
885+ continue;
886+ job->ret = job->func( job->arg ); /* execute the function */
887+ x264_sync_frame_list_push( &pool->done, (void*)job );
888+ }
889+}
890+
891+int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
892+ void (*init_func)(void *), void *init_arg )
893+{
894+ if( threads <= 0 )
895+ return -1;
896+
897+ x264_threadpool_t *pool;
898+ CHECKED_MALLOCZERO( pool, sizeof(x264_threadpool_t) );
899+ *p_pool = pool;
900+
901+ pool->init_func = init_func;
902+ pool->init_arg = init_arg;
903+ pool->threads = X264_MIN( threads, X264_THREAD_MAX );
904+
905+ CHECKED_MALLOC( pool->thread_handle, pool->threads * sizeof(x264_pthread_t) );
906+
907+ if( x264_sync_frame_list_init( &pool->uninit, pool->threads ) ||
908+ x264_sync_frame_list_init( &pool->run, pool->threads ) ||
909+ x264_sync_frame_list_init( &pool->done, pool->threads ) )
910+ goto fail;
911+
912+ for( int i = 0; i < pool->threads; i++ )
913+ {
914+ x264_threadpool_job_t *job;
915+ CHECKED_MALLOC( job, sizeof(x264_threadpool_job_t) );
916+ x264_sync_frame_list_push( &pool->uninit, (void*)job );
917+ }
918+ for( int i = 0; i < pool->threads; i++ )
919+ if( x264_pthread_create( pool->thread_handle+i, NULL, (void*)x264_threadpool_thread, pool ) )
920+ goto fail;
921+
922+ return 0;
923+fail:
924+ return -1;
925+}
926+
927+void x264_threadpool_run( x264_threadpool_t *pool, void *(*func)(void *), void *arg )
928+{
929+ x264_threadpool_job_t *job = (void*)x264_sync_frame_list_pop( &pool->uninit );
930+ job->func = func;
931+ job->arg = arg;
932+ x264_sync_frame_list_push( &pool->run, (void*)job );
933+}
934+
935+void *x264_threadpool_wait( x264_threadpool_t *pool, void *arg )
936+{
937+ x264_threadpool_job_t *job = NULL;
938+
939+ x264_pthread_mutex_lock( &pool->done.mutex );
940+ while( !job )
941+ {
942+ for( int i = 0; i < pool->done.i_size; i++ )
943+ {
944+ x264_threadpool_job_t *t = (void*)pool->done.list[i];
945+ if( t->arg == arg )
946+ {
947+ job = (void*)x264_frame_shift( pool->done.list+i );
948+ pool->done.i_size--;
949+ }
950+ }
951+ if( !job )
952+ x264_pthread_cond_wait( &pool->done.cv_fill, &pool->done.mutex );
953+ }
954+ x264_pthread_mutex_unlock( &pool->done.mutex );
955+
956+ void *ret = job->ret;
957+ x264_sync_frame_list_push( &pool->uninit, (void*)job );
958+ return ret;
959+}
960+
961+static void x264_threadpool_list_delete( x264_sync_frame_list_t *slist )
962+{
963+ for( int i = 0; slist->list[i]; i++ )
964+ {
965+ x264_free( slist->list[i] );
966+ slist->list[i] = NULL;
967+ }
968+ x264_sync_frame_list_delete( slist );
969+}
970+
971+void x264_threadpool_delete( x264_threadpool_t *pool )
972+{
973+ x264_pthread_mutex_lock( &pool->run.mutex );
974+ pool->exit = 1;
975+ x264_pthread_cond_broadcast( &pool->run.cv_fill );
976+ x264_pthread_mutex_unlock( &pool->run.mutex );
977+ for( int i = 0; i < pool->threads; i++ )
978+ x264_pthread_join( pool->thread_handle[i], NULL );
979+
980+ x264_threadpool_list_delete( &pool->uninit );
981+ x264_threadpool_list_delete( &pool->run );
982+ x264_threadpool_list_delete( &pool->done );
983+ x264_free( pool->thread_handle );
984+ x264_free( pool );
985+}
986diff --git a/common/threadpool.h b/common/threadpool.h
987new file mode 100644
988index 0000000..519737c
989--- /dev/null
990+++ b/common/threadpool.h
991@@ -0,0 +1,39 @@
992+/*****************************************************************************
993+ * threadpool.h: x264 threadpool module
994+ *****************************************************************************
995+ * Copyright (C) 2010 Steven Walters <kemuri9@gmail.com>
996+ *
997+ * This program is free software; you can redistribute it and/or modify
998+ * it under the terms of the GNU General Public License as published by
999+ * the Free Software Foundation; either version 2 of the License, or
1000+ * (at your option) any later version.
1001+ *
1002+ * This program is distributed in the hope that it will be useful,
1003+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
1004+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1005+ * GNU General Public License for more details.
1006+ *
1007+ * You should have received a copy of the GNU General Public License
1008+ * along with this program; if not, write to the Free Software
1009+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
1010+ *****************************************************************************/
1011+
1012+#ifndef X264_THREADPOOL_H
1013+#define X264_THREADPOOL_H
1014+
1015+typedef struct x264_threadpool_t x264_threadpool_t;
1016+
1017+#if HAVE_PTHREAD
1018+int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
1019+ void (*init_func)(void *), void *init_arg );
1020+void x264_threadpool_run( x264_threadpool_t *pool, void *(*func)(void *), void *arg );
1021+void *x264_threadpool_wait( x264_threadpool_t *pool, void *arg );
1022+void x264_threadpool_delete( x264_threadpool_t *pool );
1023+#else
1024+#define x264_threadpool_init(p,t,f,a) -1
1025+#define x264_threadpool_run(p,f,a)
1026+#define x264_threadpool_wait(p,a) NULL
1027+#define x264_threadpool_delete(p)
1028+#endif
1029+
1030+#endif
1031diff --git a/encoder/encoder.c b/encoder/encoder.c
1032index 764be11..2014e3c 100644
1033--- a/encoder/encoder.c
1034+++ b/encoder/encoder.c
1035@@ -349,6 +349,20 @@ fail:
1036 return -1;
1037 }
1038
1039+#if HAVE_PTHREAD
1040+static void x264_encoder_thread_init( x264_t *h )
1041+{
1042+ if( h->param.i_sync_lookahead )
1043+ x264_lower_thread_priority( 10 );
1044+
1045+#if HAVE_MMX
1046+ /* Misalign mask has to be set separately for each thread. */
1047+ if( h->param.cpu&X264_CPU_SSE_MISALIGN )
1048+ x264_cpu_mask_misalign_sse();
1049+#endif
1050+}
1051+#endif
1052+
1053 /****************************************************************************
1054 *
1055 ****************************************************************************
1056@@ -1052,6 +1066,10 @@ x264_t *x264_encoder_open( x264_param_t *param )
1057 CHECKED_MALLOC( h->nal_buffer, h->out.i_bitstream * 3/2 + 4 );
1058 h->nal_buffer_size = h->out.i_bitstream * 3/2 + 4;
1059
1060+ if( h->param.i_threads > 1 &&
1061+ x264_threadpool_init( &h->threadpool, h->param.i_threads, (void*)x264_encoder_thread_init, h ) )
1062+ goto fail;
1063+
1064 h->thread[0] = h;
1065 for( int i = 1; i < h->param.i_threads + !!h->param.i_sync_lookahead; i++ )
1066 CHECKED_MALLOC( h->thread[i], sizeof(x264_t) );
1067@@ -2045,14 +2063,6 @@ static void *x264_slices_write( x264_t *h )
1068 {
1069 int i_slice_num = 0;
1070 int last_thread_mb = h->sh.i_last_mb;
1071- if( h->param.i_sync_lookahead )
1072- x264_lower_thread_priority( 10 );
1073-
1074-#if HAVE_MMX
1075- /* Misalign mask has to be set separately for each thread. */
1076- if( h->param.cpu&X264_CPU_SSE_MISALIGN )
1077- x264_cpu_mask_misalign_sse();
1078-#endif
1079
1080 #if HAVE_VISUALIZE
1081 if( h->param.b_visualize )
1082@@ -2094,11 +2104,6 @@ static void *x264_slices_write( x264_t *h )
1083
1084 static int x264_threaded_slices_write( x264_t *h )
1085 {
1086- void *ret = NULL;
1087-#if HAVE_MMX
1088- if( h->param.cpu&X264_CPU_SSE_MISALIGN )
1089- x264_cpu_mask_misalign_sse();
1090-#endif
1091 /* set first/last mb and sync contexts */
1092 for( int i = 0; i < h->param.i_threads; i++ )
1093 {
1094@@ -2122,16 +2127,14 @@ static int x264_threaded_slices_write( x264_t *h )
1095 /* dispatch */
1096 for( int i = 0; i < h->param.i_threads; i++ )
1097 {
1098- if( x264_pthread_create( &h->thread[i]->thread_handle, NULL, (void*)x264_slices_write, (void*)h->thread[i] ) )
1099- return -1;
1100+ x264_threadpool_run( h->threadpool, (void*)x264_slices_write, h->thread[i] );
1101 h->thread[i]->b_thread_active = 1;
1102 }
1103 for( int i = 0; i < h->param.i_threads; i++ )
1104 {
1105- x264_pthread_join( h->thread[i]->thread_handle, &ret );
1106 h->thread[i]->b_thread_active = 0;
1107- if( (intptr_t)ret )
1108- return (intptr_t)ret;
1109+ if( (intptr_t)x264_threadpool_wait( h->threadpool, h->thread[i] ) )
1110+ return -1;
1111 }
1112
1113 /* Go back and fix up the hpel on the borders between slices. */
1114@@ -2207,6 +2210,10 @@ int x264_encoder_encode( x264_t *h,
1115 thread_current =
1116 thread_oldest = h;
1117 }
1118+#if HAVE_MMX
1119+ if( h->i_thread_frames == 1 && h->param.cpu&X264_CPU_SSE_MISALIGN )
1120+ x264_cpu_mask_misalign_sse();
1121+#endif
1122
1123 // ok to call this before encoding any frames, since the initial values of fdec have b_kept_as_ref=0
1124 if( x264_reference_update( h ) )
1125@@ -2530,8 +2537,7 @@ int x264_encoder_encode( x264_t *h,
1126 h->i_threadslice_end = h->mb.i_mb_height;
1127 if( h->i_thread_frames > 1 )
1128 {
1129- if( x264_pthread_create( &h->thread_handle, NULL, (void*)x264_slices_write, h ) )
1130- return -1;
1131+ x264_threadpool_run( h->threadpool, (void*)x264_slices_write, h );
1132 h->b_thread_active = 1;
1133 }
1134 else if( h->param.b_sliced_threads )
1135@@ -2554,11 +2560,9 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
1136
1137 if( h->b_thread_active )
1138 {
1139- void *ret = NULL;
1140- x264_pthread_join( h->thread_handle, &ret );
1141 h->b_thread_active = 0;
1142- if( (intptr_t)ret )
1143- return (intptr_t)ret;
1144+ if( (intptr_t)x264_threadpool_wait( h->threadpool, h ) )
1145+ return -1;
1146 }
1147 if( !h->out.i_nal )
1148 {
1149@@ -2822,25 +2826,20 @@ void x264_encoder_close ( x264_t *h )
1150 x264_lookahead_delete( h );
1151
1152 if( h->param.i_threads > 1 )
1153+ x264_threadpool_delete( h->threadpool );
1154+ if( h->i_thread_frames > 1 )
1155 {
1156- // don't strictly have to wait for the other threads, but it's simpler than canceling them
1157- for( int i = 0; i < h->param.i_threads; i++ )
1158+ for( int i = 0; i < h->i_thread_frames; i++ )
1159 if( h->thread[i]->b_thread_active )
1160- x264_pthread_join( h->thread[i]->thread_handle, NULL );
1161- if( h->i_thread_frames > 1 )
1162- {
1163- for( int i = 0; i < h->i_thread_frames; i++ )
1164- if( h->thread[i]->b_thread_active )
1165- {
1166- assert( h->thread[i]->fenc->i_reference_count == 1 );
1167- x264_frame_delete( h->thread[i]->fenc );
1168- }
1169+ {
1170+ assert( h->thread[i]->fenc->i_reference_count == 1 );
1171+ x264_frame_delete( h->thread[i]->fenc );
1172+ }
1173
1174- x264_t *thread_prev = h->thread[h->i_thread_phase];
1175- x264_thread_sync_ratecontrol( h, thread_prev, h );
1176- x264_thread_sync_ratecontrol( thread_prev, thread_prev, h );
1177- h->i_frame = thread_prev->i_frame + 1 - h->i_thread_frames;
1178- }
1179+ x264_t *thread_prev = h->thread[h->i_thread_phase];
1180+ x264_thread_sync_ratecontrol( h, thread_prev, h );
1181+ x264_thread_sync_ratecontrol( thread_prev, thread_prev, h );
1182+ h->i_frame = thread_prev->i_frame + 1 - h->i_thread_frames;
1183 }
1184 h->i_frame++;
1185
1186diff --git a/encoder/lookahead.c b/encoder/lookahead.c
1187index a79d4b1..f0af216 100644
1188--- a/encoder/lookahead.c
1189+++ b/encoder/lookahead.c
1190@@ -37,7 +37,7 @@
1191 #include "common/common.h"
1192 #include "analyse.h"
1193
1194-static void x264_lookahead_shift( x264_synch_frame_list_t *dst, x264_synch_frame_list_t *src, int count )
1195+static void x264_lookahead_shift( x264_sync_frame_list_t *dst, x264_sync_frame_list_t *src, int count )
1196 {
1197 int i = count;
1198 while( i-- )
1199@@ -137,9 +137,9 @@ int x264_lookahead_init( x264_t *h, int i_slicetype_length )
1200 look->i_slicetype_length = i_slicetype_length;
1201
1202 /* init frame lists */
1203- if( x264_synch_frame_list_init( &look->ifbuf, h->param.i_sync_lookahead+3 ) ||
1204- x264_synch_frame_list_init( &look->next, h->frames.i_delay+3 ) ||
1205- x264_synch_frame_list_init( &look->ofbuf, h->frames.i_delay+3 ) )
1206+ if( x264_sync_frame_list_init( &look->ifbuf, h->param.i_sync_lookahead+3 ) ||
1207+ x264_sync_frame_list_init( &look->next, h->frames.i_delay+3 ) ||
1208+ x264_sync_frame_list_init( &look->ofbuf, h->frames.i_delay+3 ) )
1209 goto fail;
1210
1211 if( !h->param.i_sync_lookahead )
1212@@ -153,7 +153,7 @@ int x264_lookahead_init( x264_t *h, int i_slicetype_length )
1213 if( x264_macroblock_thread_allocate( look_h, 1 ) < 0 )
1214 goto fail;
1215
1216- if( x264_pthread_create( &look_h->thread_handle, NULL, (void *)x264_lookahead_thread, look_h ) )
1217+ if( x264_pthread_create( &look->thread_handle, NULL, (void*)x264_lookahead_thread, look_h ) )
1218 goto fail;
1219 look->b_thread_active = 1;
1220
1221@@ -171,25 +171,25 @@ void x264_lookahead_delete( x264_t *h )
1222 h->lookahead->b_exit_thread = 1;
1223 x264_pthread_cond_broadcast( &h->lookahead->ifbuf.cv_fill );
1224 x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex );
1225- x264_pthread_join( h->thread[h->param.i_threads]->thread_handle, NULL );
1226+ x264_pthread_join( h->lookahead->thread_handle, NULL );
1227 x264_macroblock_cache_free( h->thread[h->param.i_threads] );
1228 x264_macroblock_thread_free( h->thread[h->param.i_threads], 1 );
1229 x264_free( h->thread[h->param.i_threads] );
1230 }
1231- x264_synch_frame_list_delete( &h->lookahead->ifbuf );
1232- x264_synch_frame_list_delete( &h->lookahead->next );
1233+ x264_sync_frame_list_delete( &h->lookahead->ifbuf );
1234+ x264_sync_frame_list_delete( &h->lookahead->next );
1235 if( h->lookahead->last_nonb )
1236 x264_frame_push_unused( h, h->lookahead->last_nonb );
1237- x264_synch_frame_list_delete( &h->lookahead->ofbuf );
1238+ x264_sync_frame_list_delete( &h->lookahead->ofbuf );
1239 x264_free( h->lookahead );
1240 }
1241
1242 void x264_lookahead_put_frame( x264_t *h, x264_frame_t *frame )
1243 {
1244 if( h->param.i_sync_lookahead )
1245- x264_synch_frame_list_push( &h->lookahead->ifbuf, frame );
1246+ x264_sync_frame_list_push( &h->lookahead->ifbuf, frame );
1247 else
1248- x264_synch_frame_list_push( &h->lookahead->next, frame );
1249+ x264_sync_frame_list_push( &h->lookahead->next, frame );
1250 }
1251
1252 int x264_lookahead_is_empty( x264_t *h )
1253diff --git a/input/thread.c b/input/thread.c
1254index a88cfae..c4b07fa 100644
1255--- a/input/thread.c
1256+++ b/input/thread.c
1257@@ -30,10 +30,9 @@ typedef struct
1258 cli_input_t input;
1259 hnd_t p_handle;
1260 x264_picture_t pic;
1261- x264_pthread_t tid;
1262+ x264_threadpool_t *pool;
1263 int next_frame;
1264 int frame_total;
1265- int in_progress;
1266 struct thread_input_arg_t *next_args;
1267 } thread_hnd_t;
1268
1269@@ -55,7 +54,6 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1270 }
1271 h->input = input;
1272 h->p_handle = *p_handle;
1273- h->in_progress = 0;
1274 h->next_frame = -1;
1275 h->next_args = malloc( sizeof(thread_input_arg_t) );
1276 if( !h->next_args )
1277@@ -66,6 +64,9 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1278 thread_input.picture_alloc = h->input.picture_alloc;
1279 thread_input.picture_clean = h->input.picture_clean;
1280
1281+ if( x264_threadpool_init( &h->pool, 1, NULL, NULL ) )
1282+ return -1;
1283+
1284 *p_handle = h;
1285 return 0;
1286 }
1287@@ -88,9 +89,8 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
1288
1289 if( h->next_frame >= 0 )
1290 {
1291- x264_pthread_join( h->tid, NULL );
1292+ x264_threadpool_wait( h->pool, h->next_args );
1293 ret |= h->next_args->status;
1294- h->in_progress = 0;
1295 }
1296
1297 if( h->next_frame == i_frame )
1298@@ -103,9 +103,7 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
1299 h->next_frame =
1300 h->next_args->i_frame = i_frame+1;
1301 h->next_args->pic = &h->pic;
1302- if( x264_pthread_create( &h->tid, NULL, (void*)read_frame_thread_int, h->next_args ) )
1303- return -1;
1304- h->in_progress = 1;
1305+ x264_threadpool_run( h->pool, (void*)read_frame_thread_int, h->next_args );
1306 }
1307 else
1308 h->next_frame = -1;
1309@@ -124,8 +122,7 @@ static int release_frame( x264_picture_t *pic, hnd_t handle )
1310 static int close_file( hnd_t handle )
1311 {
1312 thread_hnd_t *h = handle;
1313- if( h->in_progress )
1314- x264_pthread_join( h->tid, NULL );
1315+ x264_threadpool_delete( h->pool );
1316 h->input.close_file( h->p_handle );
1317 h->input.picture_clean( &h->pic );
1318 free( h->next_args );
1319--
13201.7.0.4
1321
1322
1323From 577fc11ad64a6dfe32c26e08b78a9f491c0dff5e Mon Sep 17 00:00:00 2001
1324From: Lamont Alston <wewk584@gmail.com>
1325Date: Wed, 16 Jun 2010 10:05:17 -0700
1326Subject: [PATCH 09/10] Add open-GOP support
1327
1328---
1329 common/common.c | 13 +++++++++-
1330 common/common.h | 6 ++++-
1331 encoder/encoder.c | 45 +++++++++++++++++++++++++-------------
1332 encoder/ratecontrol.c | 1 +
1333 encoder/slicetype.c | 57 +++++++++++++++++++++++++++++++++++++------------
1334 x264.c | 11 ++++++++-
1335 x264.h | 8 ++++++-
1336 7 files changed, 107 insertions(+), 34 deletions(-)
1337
1338diff --git a/common/common.c b/common/common.c
1339index cb1aa9c..d61d82a 100644
1340--- a/common/common.c
1341+++ b/common/common.c
1342@@ -676,6 +676,15 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
1343 p->i_bframe_pyramid = atoi(value);
1344 }
1345 }
1346+ OPT("open-gop")
1347+ {
1348+ b_error |= parse_enum( value, x264_open_gop_names, &p->i_open_gop );
1349+ if( b_error )
1350+ {
1351+ b_error = 0;
1352+ p->i_open_gop = atoi(value);
1353+ }
1354+ }
1355 OPT("nf")
1356 p->b_deblocking_filter = !atobool(value);
1357 OPT2("filter", "deblock")
1358@@ -1190,9 +1199,9 @@ char *x264_param2string( x264_param_t *p, int b_res )
1359 s += sprintf( s, " bframes=%d", p->i_bframe );
1360 if( p->i_bframe )
1361 {
1362- s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d weightb=%d",
1363+ s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d weightb=%d open_gop=%d",
1364 p->i_bframe_pyramid, p->i_bframe_adaptive, p->i_bframe_bias,
1365- p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred );
1366+ p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred, p->i_open_gop );
1367 }
1368 s += sprintf( s, " weightp=%d", p->analyse.i_weighted_pred > 0 ? p->analyse.i_weighted_pred : 0 );
1369
1370diff --git a/common/common.h b/common/common.h
1371index 60899fe..dfa1121 100644
1372--- a/common/common.h
1373+++ b/common/common.h
1374@@ -471,7 +471,11 @@ struct x264_t
1375 /* frames used for reference + sentinels */
1376 x264_frame_t *reference[16+2];
1377
1378- int i_last_keyframe; /* Frame number of the last keyframe */
1379+ int i_last_keyframe; /* Frame number of the last keyframe */
1380+ int i_last_idr; /* Frame number of the last IDR (not RP)*/
1381+ int i_poc_last_open_gop; /* Poc of the I frame of the last open-gop. The value
1382+ * is only assigned during the period between that
1383+ * I frame and the next P or I frame, else -1 */
1384
1385 int i_input; /* Number of input frames already accepted */
1386
1387diff --git a/encoder/encoder.c b/encoder/encoder.c
1388index 2014e3c..9ccda81 100644
1389--- a/encoder/encoder.c
1390+++ b/encoder/encoder.c
1391@@ -573,12 +573,10 @@ static int x264_validate_parameters( x264_t *h )
1392 x264_log( h, X264_LOG_WARNING, "subme=0 + direct=temporal is not supported\n" );
1393 h->param.analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
1394 }
1395- h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_BFRAME_MAX );
1396+ h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_MIN( X264_BFRAME_MAX, h->param.i_keyint_max-1 ) );
1397+ h->param.i_open_gop = x264_clip3( h->param.i_open_gop, X264_OPEN_GOP_NONE, X264_OPEN_GOP_CODED_ORDER );
1398 if( h->param.i_keyint_max == 1 )
1399- {
1400- h->param.i_bframe = 0;
1401 h->param.b_intra_refresh = 0;
1402- }
1403 h->param.i_bframe_bias = x264_clip3( h->param.i_bframe_bias, -90, 100 );
1404 if( h->param.i_bframe <= 1 )
1405 h->param.i_bframe_pyramid = X264_B_PYRAMID_NONE;
1406@@ -588,6 +586,7 @@ static int x264_validate_parameters( x264_t *h )
1407 h->param.i_bframe_adaptive = X264_B_ADAPT_NONE;
1408 h->param.analyse.i_direct_mv_pred = 0;
1409 h->param.analyse.b_weighted_bipred = 0;
1410+ h->param.i_open_gop = X264_OPEN_GOP_NONE;
1411 }
1412 if( h->param.b_intra_refresh && h->param.i_bframe_pyramid == X264_B_PYRAMID_NORMAL )
1413 {
1414@@ -599,6 +598,11 @@ static int x264_validate_parameters( x264_t *h )
1415 x264_log( h, X264_LOG_WARNING, "ref > 1 + intra-refresh is not supported\n" );
1416 h->param.i_frame_reference = 1;
1417 }
1418+ if( h->param.b_intra_refresh && h->param.i_open_gop )
1419+ {
1420+ x264_log( h, X264_LOG_WARNING, "intra-refresh is not compatible with open-gop\n" );
1421+ h->param.i_open_gop = X264_OPEN_GOP_NONE;
1422+ }
1423 if( h->param.i_keyint_min == X264_KEYINT_MIN_AUTO )
1424 h->param.i_keyint_min = h->param.i_keyint_max / 10;
1425 h->param.i_keyint_min = x264_clip3( h->param.i_keyint_min, 1, h->param.i_keyint_max/2+1 );
1426@@ -978,9 +982,11 @@ x264_t *x264_encoder_open( x264_param_t *param )
1427 h->frames.b_have_lowres |= h->param.rc.b_stat_read && h->param.rc.i_vbv_buffer_size > 0;
1428 h->frames.b_have_sub8x8_esa = !!(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
1429
1430+ h->frames.i_last_idr =
1431 h->frames.i_last_keyframe = - h->param.i_keyint_max;
1432 h->frames.i_input = 0;
1433 h->frames.i_largest_pts = h->frames.i_second_largest_pts = -1;
1434+ h->frames.i_poc_last_open_gop = -1;
1435
1436 CHECKED_MALLOCZERO( h->frames.unused[0], (h->frames.i_delay + 3) * sizeof(x264_frame_t *) );
1437 /* Allocate room for max refs plus a few extra just in case. */
1438@@ -1689,35 +1695,37 @@ static inline void x264_reference_hierarchy_reset( x264_t *h )
1439 {
1440 int ref;
1441 int b_hasdelayframe = 0;
1442- if( !h->param.i_bframe_pyramid )
1443- return;
1444
1445 /* look for delay frames -- chain must only contain frames that are disposable */
1446 for( int i = 0; h->frames.current[i] && IS_DISPOSABLE( h->frames.current[i]->i_type ); i++ )
1447 b_hasdelayframe |= h->frames.current[i]->i_coded
1448 != h->frames.current[i]->i_frame + h->sps->vui.i_num_reorder_frames;
1449
1450- if( h->param.i_bframe_pyramid != X264_B_PYRAMID_STRICT && !b_hasdelayframe )
1451+ /* This function must handle b-pyramid and clear frames for open-gop */
1452+ if( h->param.i_bframe_pyramid != X264_B_PYRAMID_STRICT && !b_hasdelayframe && h->frames.i_poc_last_open_gop == -1 )
1453 return;
1454
1455 /* Remove last BREF. There will never be old BREFs in the
1456 * dpb during a BREF decode when pyramid == STRICT */
1457 for( ref = 0; h->frames.reference[ref]; ref++ )
1458 {
1459- if( h->param.i_bframe_pyramid == X264_B_PYRAMID_STRICT
1460+ if( ( h->param.i_bframe_pyramid == X264_B_PYRAMID_STRICT
1461 && h->frames.reference[ref]->i_type == X264_TYPE_BREF )
1462+ || ( h->frames.reference[ref]->i_poc < h->frames.i_poc_last_open_gop
1463+ && h->sh.i_type != SLICE_TYPE_B ) )
1464 {
1465 int diff = h->i_frame_num - h->frames.reference[ref]->i_frame_num;
1466 h->sh.mmco[h->sh.i_mmco_command_count].i_difference_of_pic_nums = diff;
1467 h->sh.mmco[h->sh.i_mmco_command_count++].i_poc = h->frames.reference[ref]->i_poc;
1468- x264_frame_push_unused( h, x264_frame_pop( h->frames.reference ) );
1469+ x264_frame_push_unused( h, x264_frame_shift( &h->frames.reference[ref] ) );
1470 h->b_ref_reorder[0] = 1;
1471- break;
1472+ ref--;
1473 }
1474 }
1475
1476- /* Prepare to room in the dpb for the delayed display time of the later b-frame's */
1477- h->sh.i_mmco_remove_from_end = X264_MAX( ref + 2 - h->frames.i_max_dpb, 0 );
1478+ /* Prepare room in the dpb for the delayed display time of the later b-frame's */
1479+ if( h->param.i_bframe_pyramid )
1480+ h->sh.i_mmco_remove_from_end = X264_MAX( ref + 2 - h->frames.i_max_dpb, 0 );
1481 }
1482
1483 static inline void x264_slice_init( x264_t *h, int i_nal_type, int i_global_qp )
1484@@ -2322,12 +2330,17 @@ int x264_encoder_encode( x264_t *h,
1485 {
1486 h->frames.i_last_keyframe = h->fenc->i_frame;
1487 if( h->fenc->i_type == X264_TYPE_IDR )
1488+ {
1489 h->i_frame_num = 0;
1490+ h->frames.i_last_idr = h->fenc->i_frame;
1491+ }
1492 }
1493 h->sh.i_mmco_command_count =
1494 h->sh.i_mmco_remove_from_end = 0;
1495 h->b_ref_reorder[0] =
1496 h->b_ref_reorder[1] = 0;
1497+ h->fdec->i_poc =
1498+ h->fenc->i_poc = 2 * ( h->fenc->i_frame - X264_MAX( h->frames.i_last_idr, 0 ) );
1499
1500 /* ------------------- Setup frame context ----------------------------- */
1501 /* 5: Init data dependent of frame type */
1502@@ -2338,6 +2351,7 @@ int x264_encoder_encode( x264_t *h,
1503 i_nal_ref_idc = NAL_PRIORITY_HIGHEST;
1504 h->sh.i_type = SLICE_TYPE_I;
1505 x264_reference_reset( h );
1506+ h->frames.i_poc_last_open_gop = -1;
1507 }
1508 else if( h->fenc->i_type == X264_TYPE_I )
1509 {
1510@@ -2345,6 +2359,8 @@ int x264_encoder_encode( x264_t *h,
1511 i_nal_ref_idc = NAL_PRIORITY_HIGH; /* Not completely true but for now it is (as all I/P are kept as ref)*/
1512 h->sh.i_type = SLICE_TYPE_I;
1513 x264_reference_hierarchy_reset( h );
1514+ if( h->param.i_open_gop )
1515+ h->frames.i_poc_last_open_gop = h->fenc->b_keyframe ? h->fenc->i_poc : -1;
1516 }
1517 else if( h->fenc->i_type == X264_TYPE_P )
1518 {
1519@@ -2352,6 +2368,7 @@ int x264_encoder_encode( x264_t *h,
1520 i_nal_ref_idc = NAL_PRIORITY_HIGH; /* Not completely true but for now it is (as all I/P are kept as ref)*/
1521 h->sh.i_type = SLICE_TYPE_P;
1522 x264_reference_hierarchy_reset( h );
1523+ h->frames.i_poc_last_open_gop = -1;
1524 }
1525 else if( h->fenc->i_type == X264_TYPE_BREF )
1526 {
1527@@ -2367,8 +2384,6 @@ int x264_encoder_encode( x264_t *h,
1528 h->sh.i_type = SLICE_TYPE_B;
1529 }
1530
1531- h->fdec->i_poc =
1532- h->fenc->i_poc = 2 * (h->fenc->i_frame - h->frames.i_last_keyframe);
1533 h->fdec->i_type = h->fenc->i_type;
1534 h->fdec->i_frame = h->fenc->i_frame;
1535 h->fenc->b_kept_as_ref =
1536@@ -2485,7 +2500,7 @@ int x264_encoder_encode( x264_t *h,
1537
1538 if( h->fenc->i_type != X264_TYPE_IDR )
1539 {
1540- int time_to_recovery = X264_MIN( h->mb.i_mb_width - 1, h->param.i_keyint_max ) + h->param.i_bframe;
1541+ int time_to_recovery = h->param.i_open_gop ? 0 : X264_MIN( h->mb.i_mb_width - 1, h->param.i_keyint_max ) + h->param.i_bframe;
1542 x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
1543 x264_sei_recovery_point_write( h, &h->out.bs, time_to_recovery );
1544 x264_nal_end( h );
1545diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
1546index 16afbf0..1030ef2 100644
1547--- a/encoder/ratecontrol.c
1548+++ b/encoder/ratecontrol.c
1549@@ -724,6 +724,7 @@ int x264_ratecontrol_new( x264_t *h )
1550 CMP_OPT_FIRST_PASS( "b_pyramid", h->param.i_bframe_pyramid );
1551 CMP_OPT_FIRST_PASS( "intra_refresh", h->param.b_intra_refresh );
1552 CMP_OPT_FIRST_PASS( "keyint", h->param.i_keyint_max );
1553+ CMP_OPT_FIRST_PASS( "open_gop", h->param.i_open_gop );
1554
1555 if( strstr( opts, "qp=0" ) && h->param.rc.i_rc_method == X264_RC_ABR )
1556 x264_log( h, X264_LOG_WARNING, "1st pass was lossless, bitrate prediction will be inaccurate\n" );
1557diff --git a/encoder/slicetype.c b/encoder/slicetype.c
1558index 60f3a24..0a2514d 100644
1559--- a/encoder/slicetype.c
1560+++ b/encoder/slicetype.c
1561@@ -1058,7 +1058,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
1562 {
1563 x264_mb_analysis_t a;
1564 x264_frame_t *frames[X264_LOOKAHEAD_MAX+3] = { NULL, };
1565- int num_frames, orig_num_frames, keyint_limit, idr_frame_type, framecnt;
1566+ int num_frames, orig_num_frames, keyint_limit, framecnt;
1567 int i_mb_count = NUM_MBS;
1568 int cost1p0, cost2p0, cost1b1, cost2p1;
1569 int i_max_search = X264_MIN( h->lookahead->next.i_size, X264_LOOKAHEAD_MAX );
1570@@ -1080,7 +1080,6 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
1571 orig_num_frames = num_frames = h->param.b_intra_refresh ? framecnt : X264_MIN( framecnt, keyint_limit );
1572
1573 x264_lowres_context_init( h, &a );
1574- idr_frame_type = frames[1]->i_frame - h->lookahead->i_last_keyframe >= h->param.i_keyint_min ? X264_TYPE_IDR : X264_TYPE_I;
1575
1576 /* This is important psy-wise: if we have a non-scenecut keyframe,
1577 * there will be significant visual artifacts if the frames just before
1578@@ -1092,12 +1091,12 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
1579 {
1580 frames[1]->i_type = X264_TYPE_P;
1581 if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1, 1, orig_num_frames ) )
1582- frames[1]->i_type = idr_frame_type;
1583+ frames[1]->i_type = X264_TYPE_I;
1584 return;
1585 }
1586 else if( num_frames == 0 )
1587 {
1588- frames[1]->i_type = idr_frame_type;
1589+ frames[1]->i_type = X264_TYPE_I;
1590 return;
1591 }
1592
1593@@ -1106,7 +1105,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
1594 int reset_start;
1595 if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1, 1, orig_num_frames ) )
1596 {
1597- frames[1]->i_type = idr_frame_type;
1598+ frames[1]->i_type = X264_TYPE_I;
1599 return;
1600 }
1601
1602@@ -1210,15 +1209,19 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
1603
1604 /* Enforce keyframe limit. */
1605 if( !h->param.b_intra_refresh )
1606- for( int j = 0; j < num_frames; j++ )
1607+ for( int i = keyint_limit+1; i <= num_frames; i += h->param.i_keyint_max )
1608 {
1609- if( ((j-keyint_limit) % h->param.i_keyint_max) == 0 )
1610+ int j = i;
1611+ if( h->param.i_open_gop == X264_OPEN_GOP_CODED_ORDER )
1612 {
1613- if( j && h->param.i_keyint_max > 1 )
1614- frames[j]->i_type = X264_TYPE_P;
1615- frames[j+1]->i_type = X264_TYPE_IDR;
1616- reset_start = X264_MIN( reset_start, j+2 );
1617+ while( IS_X264_TYPE_B( frames[i]->i_type ) )
1618+ i++;
1619+ while( IS_X264_TYPE_B( frames[j-1]->i_type ) )
1620+ j--;
1621 }
1622+ frames[i]->i_type = X264_TYPE_I;
1623+ reset_start = X264_MIN( reset_start, i+1 );
1624+ i = j;
1625 }
1626
1627 if( h->param.rc.i_vbv_buffer_size )
1628@@ -1303,13 +1306,39 @@ void x264_slicetype_decide( x264_t *h )
1629 frm->i_frame, x264_b_pyramid_names[h->param.i_bframe_pyramid], h->param.i_frame_reference );
1630 }
1631
1632+ if( frm->i_type == X264_TYPE_KEYFRAME )
1633+ frm->i_type = h->param.i_open_gop ? X264_TYPE_I : X264_TYPE_IDR;
1634+
1635 /* Limit GOP size */
1636 if( (!h->param.b_intra_refresh || frm->i_frame == 0) && frm->i_frame - h->lookahead->i_last_keyframe >= h->param.i_keyint_max )
1637 {
1638- if( frm->i_type == X264_TYPE_AUTO )
1639+ if( frm->i_type == X264_TYPE_AUTO || frm->i_type == X264_TYPE_I )
1640+ frm->i_type = h->param.i_open_gop && h->lookahead->i_last_keyframe >= 0 ? X264_TYPE_I : X264_TYPE_IDR;
1641+ int warn = frm->i_type != X264_TYPE_IDR;
1642+ if( warn && h->param.i_open_gop == X264_OPEN_GOP_DISPLAY_ORDER )
1643+ warn &= frm->i_type != X264_TYPE_I;
1644+ if( warn && h->param.i_open_gop == X264_OPEN_GOP_CODED_ORDER )
1645+ {
1646+ /* if this minigop ends with i, it's not a violation */
1647+ int j = bframes;
1648+ while( IS_X264_TYPE_B( h->lookahead->next.list[j]->i_type ) )
1649+ j++;
1650+ warn = h->lookahead->next.list[j]->i_type != X264_TYPE_I && h->lookahead->next.list[j]->i_type != X264_TYPE_KEYFRAME;
1651+ }
1652+ if( warn )
1653+ x264_log( h, X264_LOG_WARNING, "specified frame type (%d) at %d is not compatible with keyframe interval\n", frm->i_type, frm->i_frame );
1654+ }
1655+ if( frm->i_type == X264_TYPE_I && frm->i_frame - h->lookahead->i_last_keyframe >= h->param.i_keyint_min )
1656+ {
1657+ if( h->param.i_open_gop )
1658+ {
1659+ h->lookahead->i_last_keyframe = frm->i_frame; // Use display order
1660+ if( h->param.i_open_gop == X264_OPEN_GOP_CODED_ORDER )
1661+ h->lookahead->i_last_keyframe -= bframes; // Use coded order
1662+ frm->b_keyframe = 1;
1663+ }
1664+ else
1665 frm->i_type = X264_TYPE_IDR;
1666- if( frm->i_type != X264_TYPE_IDR )
1667- x264_log( h, X264_LOG_WARNING, "specified frame type (%d) is not compatible with keyframe interval\n", frm->i_type );
1668 }
1669 if( frm->i_type == X264_TYPE_IDR )
1670 {
1671diff --git a/x264.c b/x264.c
1672index f1b55d4..14282ff 100644
1673--- a/x264.c
1674+++ b/x264.c
1675@@ -380,6 +380,12 @@ static void Help( x264_param_t *defaults, int longhelp )
1676 " - strict: Strictly hierarchical pyramid\n"
1677 " - normal: Non-strict (not Blu-ray compatible)\n",
1678 strtable_lookup( x264_b_pyramid_names, defaults->i_bframe_pyramid ) );
1679+ H1( " --open-gop <string> Use recovery points to close GOPs [none]\n"
1680+ " - none: Use standard closed GOPs\n"
1681+ " - display: Base GOP length on display order\n"
1682+ " - coded: Base GOP length on coded order\n"
1683+ " (Required for Blu-Ray)\n"
1684+ " Only available with b-frames\n" );
1685 H1( " --no-cabac Disable CABAC\n" );
1686 H1( " -r, --ref <integer> Number of reference frames [%d]\n", defaults->i_frame_reference );
1687 H1( " --no-deblock Disable loop filter\n" );
1688@@ -441,7 +447,8 @@ static void Help( x264_param_t *defaults, int longhelp )
1689 " or b=<float> (bitrate multiplier)\n" );
1690 H2( " --qpfile <string> Force frametypes and QPs for some or all frames\n"
1691 " Format of each line: framenumber frametype QP\n"
1692- " QP of -1 lets x264 choose. Frametypes: I,i,P,B,b.\n"
1693+ " QP of -1 lets x264 choose. Frametypes: I,i,K,P,B,b.\n"
1694+ " K=<I or i> depending on open-gop setting\n"
1695 " QPs are restricted by qpmin/qpmax.\n" );
1696 H1( "\n" );
1697 H1( "Analysis:\n" );
1698@@ -627,6 +634,7 @@ static struct option long_options[] =
1699 { "no-b-adapt", no_argument, NULL, 0 },
1700 { "b-bias", required_argument, NULL, 0 },
1701 { "b-pyramid", required_argument, NULL, 0 },
1702+ { "open-gop", required_argument, NULL, 0 },
1703 { "min-keyint", required_argument, NULL, 'i' },
1704 { "keyint", required_argument, NULL, 'I' },
1705 { "intra-refresh", no_argument, NULL, 0 },
1706@@ -1305,6 +1313,7 @@ static void parse_qpfile( cli_opt_t *opt, x264_picture_t *pic, int i_frame )
1707 pic->i_qpplus1 = qp+1;
1708 if ( type == 'I' ) pic->i_type = X264_TYPE_IDR;
1709 else if( type == 'i' ) pic->i_type = X264_TYPE_I;
1710+ else if( type == 'K' ) pic->i_type = X264_TYPE_KEYFRAME;
1711 else if( type == 'P' ) pic->i_type = X264_TYPE_P;
1712 else if( type == 'B' ) pic->i_type = X264_TYPE_BREF;
1713 else if( type == 'b' ) pic->i_type = X264_TYPE_B;
1714diff --git a/x264.h b/x264.h
1715index 9cd4600..09183fd 100644
1716--- a/x264.h
1717+++ b/x264.h
1718@@ -35,7 +35,7 @@
1719
1720 #include <stdarg.h>
1721
1722-#define X264_BUILD 98
1723+#define X264_BUILD 99
1724
1725 /* x264_t:
1726 * opaque handler for encoder */
1727@@ -104,6 +104,9 @@ typedef struct x264_t x264_t;
1728 #define X264_B_PYRAMID_STRICT 1
1729 #define X264_B_PYRAMID_NORMAL 2
1730 #define X264_KEYINT_MIN_AUTO 0
1731+#define X264_OPEN_GOP_NONE 0
1732+#define X264_OPEN_GOP_DISPLAY_ORDER 1
1733+#define X264_OPEN_GOP_CODED_ORDER 2
1734
1735 static const char * const x264_direct_pred_names[] = { "none", "spatial", "temporal", "auto", 0 };
1736 static const char * const x264_motion_est_names[] = { "dia", "hex", "umh", "esa", "tesa", 0 };
1737@@ -115,6 +118,7 @@ static const char * const x264_colorprim_names[] = { "", "bt709", "undef", "", "
1738 static const char * const x264_transfer_names[] = { "", "bt709", "undef", "", "bt470m", "bt470bg", "smpte170m", "smpte240m", "linear", "log100", "log316", 0 };
1739 static const char * const x264_colmatrix_names[] = { "GBR", "bt709", "undef", "", "fcc", "bt470bg", "smpte170m", "smpte240m", "YCgCo", 0 };
1740 static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 };
1741+static const char * const x264_open_gop_names[] = { "none", "display", "coded", 0 };
1742
1743 /* Colorspace type
1744 * legacy only; nothing other than I420 is really supported. */
1745@@ -138,6 +142,7 @@ static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 };
1746 #define X264_TYPE_P 0x0003
1747 #define X264_TYPE_BREF 0x0004 /* Non-disposable B-frame */
1748 #define X264_TYPE_B 0x0005
1749+#define X264_TYPE_KEYFRAME 0x0006 /* IDR or I depending on b_open_gop option */
1750 #define IS_X264_TYPE_I(x) ((x)==X264_TYPE_I || (x)==X264_TYPE_IDR)
1751 #define IS_X264_TYPE_B(x) ((x)==X264_TYPE_B || (x)==X264_TYPE_BREF)
1752
1753@@ -221,6 +226,7 @@ typedef struct x264_param_t
1754 int i_bframe_adaptive;
1755 int i_bframe_bias;
1756 int i_bframe_pyramid; /* Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal */
1757+ int i_open_gop; /* Open gop: 1=display order, 2=coded order to determine gop size */
1758
1759 int b_deblocking_filter;
1760 int i_deblocking_filter_alphac0; /* [-6, 6] -6 light filter, 6 strong */
1761--
17621.7.0.4
1763
1764
1765From b4013fbaa70db3301a14c7bf141ec1d89665e608 Mon Sep 17 00:00:00 2001
1766From: Jason Garrett-Glaser <darkshikari@gmail.com>
1767Date: Thu, 17 Jun 2010 14:50:07 -0700
1768Subject: [PATCH 10/10] Lookaheadless MB-tree support
1769 Uses past motion information instead of future data from the lookahead.
1770 Not as accurate, but better than nothing in zero-latency compression when a lookahead isn't available.
1771 Currently resets on keyframes, so only available if intra-refresh is set, to avoid pops on non-scenecut keyframes.
1772
1773Also slightly modify encoding presets: disable rc-lookahead in the fastest presets.
1774Enable MB-tree in "veryfast", albeit with a very short lookahead.
1775---
1776 common/common.c | 5 ++++-
1777 encoder/encoder.c | 7 ++++++-
1778 encoder/slicetype.c | 48 ++++++++++++++++++++++++++++++++++--------------
1779 x264.c | 14 +++++++-------
1780 4 files changed, 51 insertions(+), 23 deletions(-)
1781
1782diff --git a/common/common.c b/common/common.c
1783index d61d82a..5095ce8 100644
1784--- a/common/common.c
1785+++ b/common/common.c
1786@@ -184,6 +184,7 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
1787 param->rc.b_mb_tree = 0;
1788 param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
1789 param->analyse.b_weighted_bipred = 0;
1790+ param->rc.i_lookahead = 0;
1791 }
1792 else if( !strcasecmp( preset, "superfast" ) )
1793 {
1794@@ -195,6 +196,7 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
1795 param->analyse.i_trellis = 0;
1796 param->rc.b_mb_tree = 0;
1797 param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
1798+ param->rc.i_lookahead = 0;
1799 }
1800 else if( !strcasecmp( preset, "veryfast" ) )
1801 {
1802@@ -203,8 +205,8 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
1803 param->i_frame_reference = 1;
1804 param->analyse.b_mixed_references = 0;
1805 param->analyse.i_trellis = 0;
1806- param->rc.b_mb_tree = 0;
1807 param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
1808+ param->rc.i_lookahead = 10;
1809 }
1810 else if( !strcasecmp( preset, "faster" ) )
1811 {
1812@@ -355,6 +357,7 @@ static int x264_param_apply_tune( x264_param_t *param, const char *tune )
1813 param->i_bframe = 0;
1814 param->b_sliced_threads = 1;
1815 param->b_vfr_input = 0;
1816+ param->rc.b_mb_tree = 0;
1817 }
1818 else if( !strncasecmp( s, "touhou", 6 ) )
1819 {
1820diff --git a/encoder/encoder.c b/encoder/encoder.c
1821index 9ccda81..82874dd 100644
1822--- a/encoder/encoder.c
1823+++ b/encoder/encoder.c
1824@@ -621,8 +621,13 @@ static int x264_validate_parameters( x264_t *h )
1825 }
1826
1827 h->param.rc.f_qcompress = x264_clip3f( h->param.rc.f_qcompress, 0.0, 1.0 );
1828- if( !h->param.rc.i_lookahead || h->param.i_keyint_max == 1 || h->param.rc.f_qcompress == 1 )
1829+ if( h->param.i_keyint_max == 1 || h->param.rc.f_qcompress == 1 )
1830 h->param.rc.b_mb_tree = 0;
1831+ if( !h->param.rc.i_lookahead && !h->param.b_intra_refresh && h->param.rc.b_mb_tree )
1832+ {
1833+ x264_log( h, X264_LOG_WARNING, "lookaheadless mb-tree requires intra refresh\n" );
1834+ h->param.rc.b_mb_tree = 0;
1835+ }
1836 if( h->param.rc.b_stat_read )
1837 h->param.rc.i_lookahead = 0;
1838 #if HAVE_PTHREAD
1839diff --git a/encoder/slicetype.c b/encoder/slicetype.c
1840index 0a2514d..674b298 100644
1841--- a/encoder/slicetype.c
1842+++ b/encoder/slicetype.c
1843@@ -734,7 +734,7 @@ static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, in
1844 }
1845 }
1846
1847- if( h->param.rc.i_vbv_buffer_size && referenced )
1848+ if( h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead && referenced )
1849 x264_macroblock_tree_finish( h, frames[b], b == p1 ? b - p0 : 0 );
1850 }
1851
1852@@ -743,7 +743,8 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
1853 int idx = !b_intra;
1854 int last_nonb, cur_nonb = 1;
1855 int bframes = 0;
1856- int i = num_frames - 1;
1857+ int i = num_frames;
1858+
1859 if( b_intra )
1860 x264_slicetype_frame_cost( h, a, frames, 0, 0, 0, 0 );
1861
1862@@ -751,10 +752,25 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
1863 i--;
1864 last_nonb = i;
1865
1866- if( last_nonb < idx )
1867- return;
1868
1869- memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
1870+ if( !h->param.rc.i_lookahead )
1871+ {
1872+ if( b_intra )
1873+ {
1874+ memset( frames[0]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
1875+ memcpy( frames[0]->f_qp_offset, frames[0]->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
1876+ return;
1877+ }
1878+ XCHG( uint16_t*, frames[last_nonb]->i_propagate_cost, frames[0]->i_propagate_cost );
1879+ memset( frames[0]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
1880+ }
1881+ else
1882+ {
1883+ if( last_nonb < idx )
1884+ return;
1885+ memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
1886+ }
1887+
1888 while( i-- > idx )
1889 {
1890 cur_nonb = i;
1891@@ -796,6 +812,12 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
1892 last_nonb = cur_nonb;
1893 }
1894
1895+ if( !h->param.rc.i_lookahead )
1896+ {
1897+ x264_macroblock_tree_propagate( h, frames, 0, last_nonb, last_nonb, 1 );
1898+ XCHG( uint16_t*, frames[last_nonb]->i_propagate_cost, frames[0]->i_propagate_cost );
1899+ }
1900+
1901 x264_macroblock_tree_finish( h, frames[last_nonb], last_nonb );
1902 if( h->param.i_bframe_pyramid && bframes > 1 && !h->param.rc.i_vbv_buffer_size )
1903 x264_macroblock_tree_finish( h, frames[last_nonb+(bframes+1)/2], 0 );
1904@@ -1062,6 +1084,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
1905 int i_mb_count = NUM_MBS;
1906 int cost1p0, cost2p0, cost1b1, cost2p1;
1907 int i_max_search = X264_MIN( h->lookahead->next.i_size, X264_LOOKAHEAD_MAX );
1908+ int vbv_lookahead = h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead;
1909 if( h->param.b_deterministic )
1910 i_max_search = X264_MIN( i_max_search, h->lookahead->i_slicetype_length + !keyframe );
1911
1912@@ -1074,7 +1097,11 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
1913 frames[framecnt+1] = h->lookahead->next.list[framecnt];
1914
1915 if( !framecnt )
1916+ {
1917+ if( h->param.rc.b_mb_tree )
1918+ x264_macroblock_tree( h, &a, frames, 0, keyframe );
1919 return;
1920+ }
1921
1922 keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->lookahead->i_last_keyframe - 1;
1923 orig_num_frames = num_frames = h->param.b_intra_refresh ? framecnt : X264_MIN( framecnt, keyint_limit );
1924@@ -1085,15 +1112,8 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
1925 * there will be significant visual artifacts if the frames just before
1926 * go down in quality due to being referenced less, despite it being
1927 * more RD-optimal. */
1928- if( (h->param.analyse.b_psy && h->param.rc.b_mb_tree) || h->param.rc.i_vbv_buffer_size )
1929+ if( (h->param.analyse.b_psy && h->param.rc.b_mb_tree) || vbv_lookahead )
1930 num_frames = framecnt;
1931- else if( num_frames == 1 )
1932- {
1933- frames[1]->i_type = X264_TYPE_P;
1934- if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1, 1, orig_num_frames ) )
1935- frames[1]->i_type = X264_TYPE_I;
1936- return;
1937- }
1938 else if( num_frames == 0 )
1939 {
1940 frames[1]->i_type = X264_TYPE_I;
1941@@ -1224,7 +1244,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
1942 i = j;
1943 }
1944
1945- if( h->param.rc.i_vbv_buffer_size )
1946+ if( vbv_lookahead )
1947 x264_vbv_lookahead( h, &a, frames, num_frames, keyframe );
1948
1949 /* Restore frametypes for all frames that haven't actually been decided yet. */
1950diff --git a/x264.c b/x264.c
1951index 14282ff..34305b6 100644
1952--- a/x264.c
1953+++ b/x264.c
1954@@ -285,16 +285,16 @@ static void Help( x264_param_t *defaults, int longhelp )
1955 " --no-8x8dct --aq-mode 0 --b-adapt 0\n"
1956 " --bframes 0 --no-cabac --no-deblock\n"
1957 " --no-mbtree --me dia --no-mixed-refs\n"
1958- " --partitions none --ref 1 --scenecut 0\n"
1959- " --subme 0 --trellis 0 --no-weightb\n"
1960- " --weightp 0\n"
1961+ " --partitions none --rc-lookahead 0 --ref 1\n"
1962+ " --scenecut 0 --subme 0 --trellis 0\n"
1963+ " --no-weightb --weightp 0\n"
1964 " - superfast:\n"
1965 " --no-mbtree --me dia --no-mixed-refs\n"
1966- " --partitions i8x8,i4x4 --ref 1\n"
1967- " --subme 1 --trellis 0 --weightp 0\n"
1968+ " --partitions i8x8,i4x4 --rc-lookahead 0\n"
1969+ " --ref 1 --subme 1 --trellis 0 --weightp 0\n"
1970 " - veryfast:\n"
1971- " --no-mbtree --no-mixed-refs --ref 1\n"
1972- " --subme 2 --trellis 0 --weightp 0\n"
1973+ " --no-mixed-refs --rc-lookahead 10\n"
1974+ " --ref 1 --subme 2 --trellis 0 --weightp 0\n"
1975 " - faster:\n"
1976 " --no-mixed-refs --rc-lookahead 20\n"
1977 " --ref 2 --subme 4 --weightp 1\n"
1978--
19791.7.0.4