· 8 years ago · Jun 01, 2017, 07:38 PM
1From 4fa7d539c1a09a0779194c7971495e966ff7de1e Mon Sep 17 00:00:00 2001
2From: Jason Garrett-Glaser <darkshikari@gmail.com>
3Date: Tue, 15 Jun 2010 05:15:42 -0700
4Subject: [PATCH 01/10] Fix compilation on ARM w/ Apple ABI
5
6---
7 encoder/me.c | 2 +-
8 1 files changed, 1 insertions(+), 1 deletions(-)
9
10diff --git a/encoder/me.c b/encoder/me.c
11index 2914eb3..291104a 100644
12--- a/encoder/me.c
13+++ b/encoder/me.c
14@@ -245,7 +245,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
15 pmv = pack16to32_mask( bmx, bmy );
16 if( i_mvc > 0 )
17 {
18- ALIGNED_ARRAY_8( int16_t, mvc_fpel,[16][2] );
19+ ALIGNED_ARRAY_8( int16_t, mvc_fpel,[16],[2] );
20 x264_predictor_roundclip( mvc_fpel, mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
21 bcost <<= 4;
22 for( int i = 1; i <= i_mvc; i++ )
23--
241.7.0.4
25
26
27From 69e9d85c292cb9daa96664657352bf6c65af5825 Mon Sep 17 00:00:00 2001
28From: Anton Mitrofanov <BugMaster@narod.ru>
29Date: Sat, 19 Jun 2010 01:44:56 +0400
30Subject: [PATCH 02/10] Fix SIGPIPEs caused by is_regular_file checks
31 Check to see if input file is a pipe without opening it.
32
33---
34 common/osdep.h | 10 +++++++++-
35 x264.c | 1 +
36 2 files changed, 10 insertions(+), 1 deletions(-)
37
38diff --git a/common/osdep.h b/common/osdep.h
39index b1b357c..b3a8cd6 100644
40--- a/common/osdep.h
41+++ b/common/osdep.h
42@@ -290,7 +290,15 @@ static inline uint8_t x264_is_regular_file( FILE *filehandle )
43 {
44 struct stat file_stat;
45 if( fstat( fileno( filehandle ), &file_stat ) )
46- return 0;
47+ return -1;
48+ return S_ISREG( file_stat.st_mode );
49+}
50+
51+static inline uint8_t x264_is_regular_file_path( const char *filename )
52+{
53+ struct stat file_stat;
54+ if( stat( filename, &file_stat ) )
55+ return -1;
56 return S_ISREG( file_stat.st_mode );
57 }
58
59diff --git a/x264.c b/x264.c
60index a124083..09bad61 100644
61--- a/x264.c
62+++ b/x264.c
63@@ -806,6 +806,7 @@ static int select_input( const char *demuxer, char *used_demuxer, char *filename
64 int b_auto = !strcasecmp( demuxer, "auto" );
65 if( !b_regular && b_auto )
66 ext = "yuv";
67+ b_regular = b_regular && x264_is_regular_file_path( filename );
68 if( b_regular )
69 {
70 FILE *f = fopen( filename, "r" );
71--
721.7.0.4
73
74
75From 4c27afb595ac8e8a621ffc2bf8120f0d43c80384 Mon Sep 17 00:00:00 2001
76From: Jason Garrett-Glaser <darkshikari@gmail.com>
77Date: Tue, 22 Jun 2010 14:20:46 -0700
78Subject: [PATCH 03/10] Use -fno-tree-vectorize to avoid miscompilation
79 Some versions of gcc have been reported to attempt (and fail) to vectorize a loop in plane_expand_border.
80 This results in a segfault, so to limit the possible effects of gcc's utter incompetence, we're turning off vectorization entirely.
81 It's not like it ever did anything useful to begin with.
82
83---
84 configure | 4 ++++
85 1 files changed, 4 insertions(+), 0 deletions(-)
86
87diff --git a/configure b/configure
88index 3a38cb5..24d15ad 100755
89--- a/configure
90+++ b/configure
91@@ -628,6 +628,10 @@ else
92 CFLAGS="-O3 -ffast-math $CFLAGS"
93 fi
94
95+if cc_check '' -fno-tree-vectorize ; then
96+ CFLAGS="$CFLAGS -fno-tree-vectorize"
97+fi
98+
99 if cc_check "stdio.h" "" "fseeko(stdin,0,0);" ; then
100 define fseek fseeko
101 define ftell ftello
102--
1031.7.0.4
104
105
106From c875dd5214f47b254861c15b126d3342624dcdf0 Mon Sep 17 00:00:00 2001
107From: Jason Garrett-Glaser <darkshikari@gmail.com>
108Date: Sat, 19 Jun 2010 03:27:33 -0700
109Subject: [PATCH 04/10] Improve HRD accuracy
110 In a staggering display of brain damage, the spec requires all HRD math to be done in infinite precision despite the output being of quite limited precision.
111 Accordingly, convert buffer management to work in units of timescale.
112 These accumulating rounding errors probably didn't cause any real problems, but might in theory cause issues in very picky muxers on extremely long-running streams.
113
114---
115 common/common.c | 36 ++++++++++++++-----------
116 common/common.h | 1 +
117 encoder/encoder.c | 3 +-
118 encoder/ratecontrol.c | 67 ++++++++++++++++++++++++++++++++-----------------
119 encoder/ratecontrol.h | 2 +-
120 5 files changed, 67 insertions(+), 42 deletions(-)
121
122diff --git a/common/common.c b/common/common.c
123index 4fa5e4b..cb1aa9c 100644
124--- a/common/common.c
125+++ b/common/common.c
126@@ -1080,24 +1080,28 @@ void x264_free( void *p )
127 /****************************************************************************
128 * x264_reduce_fraction:
129 ****************************************************************************/
130-void x264_reduce_fraction( uint32_t *n, uint32_t *d )
131-{
132- uint32_t a = *n;
133- uint32_t b = *d;
134- uint32_t c;
135- if( !a || !b )
136- return;
137- c = a % b;
138- while(c)
139- {
140- a = b;
141- b = c;
142- c = a % b;
143- }
144- *n /= b;
145- *d /= b;
146+#define REDUCE_FRACTION( name, type )\
147+void name( type *n, type *d )\
148+{ \
149+ type a = *n; \
150+ type b = *d; \
151+ type c; \
152+ if( !a || !b ) \
153+ return; \
154+ c = a % b; \
155+ while( c ) \
156+ { \
157+ a = b; \
158+ b = c; \
159+ c = a % b; \
160+ } \
161+ *n /= b; \
162+ *d /= b; \
163 }
164
165+REDUCE_FRACTION( x264_reduce_fraction, uint32_t )
166+REDUCE_FRACTION( x264_reduce_fraction64, uint64_t )
167+
168 /****************************************************************************
169 * x264_slurp_file:
170 ****************************************************************************/
171diff --git a/common/common.h b/common/common.h
172index abb5db2..3d522eb 100644
173--- a/common/common.h
174+++ b/common/common.h
175@@ -183,6 +183,7 @@ char *x264_param2string( x264_param_t *p, int b_res );
176 void x264_log( x264_t *h, int i_level, const char *psz_fmt, ... );
177
178 void x264_reduce_fraction( uint32_t *n, uint32_t *d );
179+void x264_reduce_fraction64( uint64_t *n, uint64_t *d );
180 void x264_init_vlc_tables();
181
182 static ALWAYS_INLINE pixel x264_clip_pixel( int x )
183diff --git a/encoder/encoder.c b/encoder/encoder.c
184index 08a28bd..aebb536 100644
185--- a/encoder/encoder.c
186+++ b/encoder/encoder.c
187@@ -2569,8 +2569,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
188 /* generate sei buffering period and insert it into place */
189 if( h->fenc->b_keyframe && h->sps->vui.b_nal_hrd_parameters_present )
190 {
191- h->initial_cpb_removal_delay = x264_hrd_fullness( h );
192-
193+ x264_hrd_fullness( h );
194 x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
195 x264_sei_buffering_period_write( h, &h->out.bs );
196 if( x264_nal_end( h ) )
197diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
198index 2c05ad7..aef5083 100644
199--- a/encoder/ratecontrol.c
200+++ b/encoder/ratecontrol.c
201@@ -91,7 +91,7 @@ struct x264_ratecontrol_t
202
203 /* VBV stuff */
204 double buffer_size;
205- double buffer_fill_final; /* real buffer as of the last finished frame */
206+ int64_t buffer_fill_final;
207 double buffer_fill; /* planned buffer, if all in-progress frames hit their bit budget */
208 double buffer_rate; /* # of bits added to buffer_fill after each frame */
209 double vbv_max_rate; /* # of bits added to buffer_fill per second */
210@@ -157,6 +157,7 @@ struct x264_ratecontrol_t
211 int initial_cpb_removal_delay_offset;
212 double nrt_first_access_unit; /* nominal removal time */
213 double previous_cpb_final_arrival_time;
214+ uint64_t hrd_multiply_denom;
215 };
216
217
218@@ -463,6 +464,8 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
219 int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * 1000;
220
221 /* Init HRD */
222+ h->sps->vui.hrd.i_bit_rate_unscaled = vbv_max_bitrate;
223+ h->sps->vui.hrd.i_cpb_size_unscaled = vbv_buffer_size;
224 if( h->param.i_nal_hrd && b_init )
225 {
226 h->sps->vui.hrd.i_cpb_cnt = 1;
227@@ -499,8 +502,8 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
228
229 #undef MAX_DURATION
230
231- vbv_buffer_size = X264_MIN( vbv_buffer_size, h->sps->vui.hrd.i_cpb_size_unscaled );
232- vbv_max_bitrate = X264_MIN( vbv_max_bitrate, h->sps->vui.hrd.i_bit_rate_unscaled );
233+ vbv_buffer_size = h->sps->vui.hrd.i_cpb_size_unscaled;
234+ vbv_max_bitrate = h->sps->vui.hrd.i_bit_rate_unscaled;
235 }
236 else if( h->param.i_nal_hrd && !b_init )
237 {
238@@ -528,7 +531,7 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
239 if( h->param.rc.f_vbv_buffer_init > 1. )
240 h->param.rc.f_vbv_buffer_init = x264_clip3f( h->param.rc.f_vbv_buffer_init / h->param.rc.i_vbv_buffer_size, 0, 1 );
241 h->param.rc.f_vbv_buffer_init = x264_clip3f( X264_MAX( h->param.rc.f_vbv_buffer_init, rc->buffer_rate / rc->buffer_size ), 0, 1);
242- rc->buffer_fill_final = rc->buffer_size * h->param.rc.f_vbv_buffer_init;
243+ rc->buffer_fill_final = rc->buffer_size * h->param.rc.f_vbv_buffer_init * h->sps->vui.i_time_scale;
244 rc->b_vbv = 1;
245 rc->b_vbv_min_rate = !rc->b_2pass
246 && h->param.rc.i_rc_method == X264_RC_ABR
247@@ -577,6 +580,23 @@ int x264_ratecontrol_new( x264_t *h )
248
249 x264_ratecontrol_init_reconfigurable( h, 1 );
250
251+ if( h->param.i_nal_hrd )
252+ {
253+ uint64_t denom = (uint64_t)h->sps->vui.hrd.i_bit_rate_unscaled * h->sps->vui.i_time_scale;
254+ uint64_t num = 180000;
255+ x264_reduce_fraction64( &num, &denom );
256+ rc->hrd_multiply_denom = 180000 / num;
257+
258+ double bits_required = log2( 180000 / rc->hrd_multiply_denom )
259+ + log2( h->sps->vui.i_time_scale )
260+ + log2( h->sps->vui.hrd.i_cpb_size_unscaled );
261+ if( bits_required >= 63 )
262+ {
263+ x264_log( h, X264_LOG_ERROR, "HRD with very large timescale and bufsize not supported\n" );
264+ return -1;
265+ }
266+ }
267+
268 if( rc->rate_tolerance < 0.01 )
269 {
270 x264_log(h, X264_LOG_WARNING, "bitrate tolerance too small, using .01\n");
271@@ -1722,9 +1742,10 @@ static void update_predictor( predictor_t *p, double q, double var, double bits
272 static int update_vbv( x264_t *h, int bits )
273 {
274 int filler = 0;
275-
276+ int bitrate = h->sps->vui.hrd.i_bit_rate_unscaled;
277 x264_ratecontrol_t *rcc = h->rc;
278 x264_ratecontrol_t *rct = h->thread[0]->rc;
279+ uint64_t buffer_size = (uint64_t)h->sps->vui.hrd.i_cpb_size_unscaled * h->sps->vui.i_time_scale;
280
281 if( rcc->last_satd >= h->mb.i_mb_count )
282 update_predictor( &rct->pred[h->sh.i_type], qp2qscale( rcc->qpa_rc ), rcc->last_satd, bits );
283@@ -1732,48 +1753,48 @@ static int update_vbv( x264_t *h, int bits )
284 if( !rcc->b_vbv )
285 return filler;
286
287- rct->buffer_fill_final -= bits;
288+ rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
289
290 if( rct->buffer_fill_final < 0 )
291- x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, rct->buffer_fill_final );
292+ x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, (double)rct->buffer_fill_final / h->sps->vui.i_time_scale );
293 rct->buffer_fill_final = X264_MAX( rct->buffer_fill_final, 0 );
294- rct->buffer_fill_final += rcc->buffer_rate;
295+ rct->buffer_fill_final += (uint64_t)bitrate * h->sps->vui.i_num_units_in_tick * h->fenc->i_cpb_duration;
296
297- if( h->sps->vui.hrd.b_cbr_hrd && rct->buffer_fill_final > rcc->buffer_size )
298+ if( h->sps->vui.hrd.b_cbr_hrd && rct->buffer_fill_final > buffer_size )
299 {
300- filler = ceil( (rct->buffer_fill_final - rcc->buffer_size) / 8 );
301- rct->buffer_fill_final -= X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
302+ filler = ceil( (rct->buffer_fill_final - buffer_size) / (8. * h->sps->vui.i_time_scale) );
303+ bits = X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
304+ rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
305 }
306 else
307- rct->buffer_fill_final = X264_MIN( rct->buffer_fill_final, rcc->buffer_size );
308+ rct->buffer_fill_final = X264_MIN( rct->buffer_fill_final, buffer_size );
309
310 return filler;
311 }
312
313-int x264_hrd_fullness( x264_t *h )
314+void x264_hrd_fullness( x264_t *h )
315 {
316 x264_ratecontrol_t *rct = h->thread[0]->rc;
317- double cpb_bits = rct->buffer_fill_final;
318- double bps = h->sps->vui.hrd.i_bit_rate_unscaled;
319- double cpb_size = h->sps->vui.hrd.i_cpb_size_unscaled;
320- double cpb_fullness = 90000.0*cpb_bits/bps;
321+ uint64_t denom = (uint64_t)h->sps->vui.hrd.i_bit_rate_unscaled * h->sps->vui.i_time_scale / rct->hrd_multiply_denom;
322+ uint64_t cpb_state = rct->buffer_fill_final;
323+ uint64_t cpb_size = (uint64_t)h->sps->vui.hrd.i_cpb_size_unscaled * h->sps->vui.i_time_scale;
324+ uint64_t multiply_factor = 180000 / rct->hrd_multiply_denom;
325
326- if( cpb_bits < 0 || cpb_bits > cpb_size )
327+ if( cpb_state < 0 || cpb_state > cpb_size )
328 {
329 x264_log( h, X264_LOG_WARNING, "CPB %s: %.0lf bits in a %.0lf-bit buffer\n",
330- cpb_bits < 0 ? "underflow" : "overflow", cpb_bits, cpb_size );
331+ cpb_state < 0 ? "underflow" : "overflow", (float)cpb_state/denom, (float)cpb_size/denom );
332 }
333
334- h->initial_cpb_removal_delay_offset = 90000.0*(cpb_size - cpb_bits)/bps;
335-
336- return x264_clip3f( cpb_fullness + 0.5, 0, 90000.0*cpb_size/bps ); // just lie if we are in a weird state
337+ h->initial_cpb_removal_delay = (multiply_factor * cpb_state + denom) / (2*denom);
338+ h->initial_cpb_removal_delay_offset = (multiply_factor * cpb_size + denom) / (2*denom) - h->initial_cpb_removal_delay;
339 }
340
341 // provisionally update VBV according to the planned size of all frames currently in progress
342 static void update_vbv_plan( x264_t *h, int overhead )
343 {
344 x264_ratecontrol_t *rcc = h->rc;
345- rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final;
346+ rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final / h->sps->vui.i_time_scale;
347 if( h->i_thread_frames > 1 )
348 {
349 int j = h->rc - h->thread[0]->rc;
350diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h
351index dd139eb..f39c070 100644
352--- a/encoder/ratecontrol.h
353+++ b/encoder/ratecontrol.h
354@@ -47,6 +47,6 @@ int x264_rc_analyse_slice( x264_t *h );
355 int x264_weighted_reference_duplicate( x264_t *h, int i_ref, const x264_weight_t *w );
356 void x264_threads_distribute_ratecontrol( x264_t *h );
357 void x264_threads_merge_ratecontrol( x264_t *h );
358-int x264_hrd_fullness( x264_t *h );
359+void x264_hrd_fullness( x264_t *h );
360 #endif
361
362--
3631.7.0.4
364
365
366From 503278343bb695fadfba7b693c8441ea45b2a5bb Mon Sep 17 00:00:00 2001
367From: Jason Garrett-Glaser <darkshikari@gmail.com>
368Date: Fri, 18 Jun 2010 13:58:11 -0700
369Subject: [PATCH 05/10] SSE4 and SSSE3 versions of some intra_sad functions
370 Primarily Nehalem-optimized.
371
372---
373 common/pixel.c | 3 +
374 common/x86/pixel.h | 2 +
375 common/x86/sad-a.asm | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++
376 3 files changed, 112 insertions(+), 0 deletions(-)
377
378diff --git a/common/pixel.c b/common/pixel.c
379index a8cb1df..8441c7a 100644
380--- a/common/pixel.c
381+++ b/common/pixel.c
382@@ -856,6 +856,9 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
383 }
384 pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_sse4;
385 pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_sse4;
386+ pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_sse4;
387+ /* Slower on Conroe, so only enable under SSE4 */
388+ pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_ssse3;
389 }
390 #endif //HAVE_MMX
391
392diff --git a/common/x86/pixel.h b/common/x86/pixel.h
393index 9bba683..b1b916d 100644
394--- a/common/x86/pixel.h
395+++ b/common/x86/pixel.h
396@@ -80,6 +80,7 @@ DECL_PIXELS( uint64_t, hadamard_ac, sse4, ( uint8_t *pix, int i_stride ))
397 void x264_intra_satd_x3_4x4_mmxext ( uint8_t *, uint8_t *, int * );
398 void x264_intra_satd_x3_4x4_ssse3 ( uint8_t *, uint8_t *, int * );
399 void x264_intra_sad_x3_4x4_mmxext ( uint8_t *, uint8_t *, int * );
400+void x264_intra_sad_x3_4x4_sse4 ( uint8_t *, uint8_t *, int * );
401 void x264_intra_satd_x3_8x8c_mmxext ( uint8_t *, uint8_t *, int * );
402 void x264_intra_satd_x3_8x8c_ssse3 ( uint8_t *, uint8_t *, int * );
403 void x264_intra_sad_x3_8x8c_mmxext ( uint8_t *, uint8_t *, int * );
404@@ -93,6 +94,7 @@ void x264_intra_sa8d_x3_8x8_mmxext ( uint8_t *, uint8_t *, int * );
405 void x264_intra_sa8d_x3_8x8_sse2 ( uint8_t *, uint8_t *, int * );
406 void x264_intra_sa8d_x3_8x8_ssse3 ( uint8_t *, uint8_t *, int * );
407 void x264_intra_sad_x3_8x8_mmxext ( uint8_t *, uint8_t *, int * );
408+void x264_intra_sad_x3_8x8_ssse3 ( uint8_t *, uint8_t *, int * );
409 void x264_intra_sa8d_x3_8x8_core_mmxext( uint8_t *, int16_t [2][8], int * );
410 void x264_intra_sa8d_x3_8x8_core_sse2 ( uint8_t *, int16_t [2][8], int * );
411 void x264_intra_sa8d_x3_8x8_core_ssse3 ( uint8_t *, int16_t [2][8], int * );
412diff --git a/common/x86/sad-a.asm b/common/x86/sad-a.asm
413index 72c1789..10a365c 100644
414--- a/common/x86/sad-a.asm
415+++ b/common/x86/sad-a.asm
416@@ -26,6 +26,19 @@
417 %include "x86inc.asm"
418 %include "x86util.asm"
419
420+SECTION_RODATA
421+
422+h4x4_pred_shuf: db 3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15
423+h4x4_pred_shuf2: db 3,7,11,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
424+h8x8_pred_shuf: times 8 db 1
425+ times 8 db 0
426+ times 8 db 3
427+ times 8 db 2
428+ times 8 db 5
429+ times 8 db 4
430+ times 8 db 7
431+ times 8 db 6
432+
433 SECTION .text
434
435 cextern pb_3
436@@ -303,6 +316,40 @@ cglobal intra_sad_x3_4x4_mmxext, 3,3
437 movd [r2+4], mm1 ;H prediction cost
438 RET
439
440+cglobal intra_sad_x3_4x4_sse4, 3,3
441+ movd xmm4, [r1+FDEC_STRIDE*0-4]
442+ pinsrd xmm4, [r1+FDEC_STRIDE*1-4], 1
443+ pinsrd xmm4, [r1+FDEC_STRIDE*2-4], 2
444+ pinsrd xmm4, [r1+FDEC_STRIDE*3-4], 3
445+ movd xmm2, [r1-FDEC_STRIDE]
446+ pxor xmm3, xmm3
447+ movdqa xmm5, xmm4
448+ pshufb xmm4, [h4x4_pred_shuf2] ; EFGH
449+ pshufb xmm5, [h4x4_pred_shuf] ; EEEEFFFFGGGGHHHH
450+ pshufd xmm0, xmm2, 0 ; ABCDABCDABCDABCD
451+ punpckldq xmm2, xmm4 ; ABCDEFGH
452+ psadbw xmm2, xmm3
453+ movd xmm1, [r0+FENC_STRIDE*0]
454+ pinsrd xmm1, [r0+FENC_STRIDE*1], 1
455+ pinsrd xmm1, [r0+FENC_STRIDE*2], 2
456+ pinsrd xmm1, [r0+FENC_STRIDE*3], 3
457+ psadbw xmm0, xmm1
458+ psadbw xmm5, xmm1
459+ psraw xmm2, 2
460+ pavgw xmm2, xmm3
461+ pshufb xmm2, xmm3 ; DC prediction
462+ movdqa xmm3, xmm0
463+ punpcklqdq xmm0, xmm5
464+ punpckhqdq xmm3, xmm5
465+ psadbw xmm2, xmm1
466+ paddw xmm0, xmm3
467+ movhlps xmm4, xmm2
468+ packusdw xmm0, xmm0
469+ paddw xmm2, xmm4
470+ movq [r2], xmm0 ; V/H prediction costs
471+ movd [r2+8], xmm2 ; DC prediction cost
472+ RET
473+
474 ;-----------------------------------------------------------------------------
475 ; void intra_sad_x3_8x8( uint8_t *fenc, uint8_t edge[33], int res[3]);
476 ;-----------------------------------------------------------------------------
477@@ -370,6 +417,66 @@ cglobal intra_sad_x3_8x8_mmxext, 3,3
478 movd [r2+8], m1
479 RET
480
481+INIT_XMM
482+cglobal intra_sad_x3_8x8_ssse3, 3,4,9
483+%ifdef PIC
484+ lea r11, [h8x8_pred_shuf]
485+%define shuf r11
486+%else
487+%define shuf h8x8_pred_shuf
488+%endif
489+ movq m0, [r1+7] ; left pixels
490+ movq m1, [r1+16] ; top pixels
491+ pxor m2, m2
492+ pxor m3, m3
493+ psadbw m2, m0
494+ psadbw m3, m1
495+ paddw m2, m3
496+ pxor m3, m3 ; V score accumulator
497+ psraw m2, 3
498+ pavgw m2, m3
499+ punpcklqdq m1, m1 ; V prediction
500+ pshufb m2, m3 ; DC prediction
501+ pxor m4, m4 ; H score accumulator
502+ pxor m5, m5 ; DC score accumulator
503+ mov r3d, 6
504+.loop:
505+ movq m6, [r0+FENC_STRIDE*0]
506+ movhps m6, [r0+FENC_STRIDE*1]
507+ movdqa m7, m0
508+ pshufb m7, [shuf+r3*8] ; H prediction
509+%ifdef ARCH_X86_64
510+ movdqa m8, m1
511+ psadbw m7, m6
512+ psadbw m8, m6
513+ psadbw m6, m2
514+ paddw m4, m7
515+ paddw m3, m8
516+ paddw m5, m6
517+%else
518+ psadbw m7, m6
519+ paddw m4, m7
520+ movdqa m7, m1
521+ psadbw m7, m6
522+ psadbw m6, m2
523+ paddw m3, m7
524+ paddw m5, m6
525+%endif
526+ add r0, FENC_STRIDE*2
527+ sub r3d, 2
528+ jge .loop
529+
530+ movhlps m0, m3
531+ movhlps m1, m4
532+ movhlps m2, m5
533+ paddw m3, m0
534+ paddw m4, m1
535+ paddw m5, m2
536+ movd [r2+0], m3
537+ movd [r2+4], m4
538+ movd [r2+8], m5
539+ RET
540+
541 ;-----------------------------------------------------------------------------
542 ; void intra_sad_x3_8x8c( uint8_t *fenc, uint8_t *fdec, int res[3] );
543 ;-----------------------------------------------------------------------------
544--
5451.7.0.4
546
547
548From 3596688385b229f3dee51e41f75ed671089f6f40 Mon Sep 17 00:00:00 2001
549From: Jason Garrett-Glaser <darkshikari@gmail.com>
550Date: Sat, 19 Jun 2010 01:41:07 -0700
551Subject: [PATCH 06/10] Improve 2-pass bitrate prediction
552 Adapt based on distance to the end in bits, not in frames.
553 Helps in videos with absurdly simple end sections, e.g. black frames.
554
555---
556 encoder/ratecontrol.c | 12 +++++++++---
557 1 files changed, 9 insertions(+), 3 deletions(-)
558
559diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
560index aef5083..16afbf0 100644
561--- a/encoder/ratecontrol.c
562+++ b/encoder/ratecontrol.c
563@@ -2034,9 +2034,6 @@ static float rate_estimate_qscale( x264_t *h )
564 double lmax = rcc->lmax[pict_type];
565 int64_t diff;
566 int64_t predicted_bits = total_bits;
567- /* Adjust ABR buffer based on distance to the end of the video. */
568- if( rcc->num_entries > h->i_frame )
569- abr_buffer *= 0.5 * sqrt( rcc->num_entries - h->i_frame );
570
571 if( rcc->b_vbv )
572 {
573@@ -2062,6 +2059,15 @@ static float rate_estimate_qscale( x264_t *h )
574 predicted_bits += (int64_t)(h->i_thread_frames - 1) * rcc->bitrate / rcc->fps;
575 }
576
577+ /* Adjust ABR buffer based on distance to the end of the video. */
578+ if( rcc->num_entries > h->i_frame )
579+ {
580+ double final_bits = rcc->entry[rcc->num_entries-1].expected_bits;
581+ double video_pos = rce.expected_bits / final_bits;
582+ double scale_factor = sqrt( (1 - video_pos) * rcc->num_entries );
583+ abr_buffer *= 0.5 * X264_MAX( scale_factor, 0.5 );
584+ }
585+
586 diff = predicted_bits - (int64_t)rce.expected_bits;
587 q = rce.new_qscale;
588 q /= x264_clip3f((double)(abr_buffer - diff) / abr_buffer, .5, 2);
589--
5901.7.0.4
591
592
593From 6f572c68b3b17ad3b425e52ded433a7360ffaac3 Mon Sep 17 00:00:00 2001
594From: Steven Walters <kemuri9@gmail.com>
595Date: Wed, 9 Jun 2010 18:14:52 -0400
596Subject: [PATCH 07/10] Use threadpools to avoid unnecessary thread creation
597 Tiny performance improvement with fast settings and lots of threads.
598 May help more on some OSs with slow thread creation, like OS X.
599 Unify inconsistent synchronized abbreviations to sync.
600
601---
602 Makefile | 3 +-
603 common/common.h | 10 ++-
604 common/frame.c | 19 +++++-
605 common/frame.h | 9 ++-
606 common/threadpool.c | 163 +++++++++++++++++++++++++++++++++++++++++++++++++++
607 common/threadpool.h | 39 ++++++++++++
608 encoder/encoder.c | 79 ++++++++++++-------------
609 encoder/lookahead.c | 22 ++++----
610 input/thread.c | 17 ++---
611 9 files changed, 288 insertions(+), 73 deletions(-)
612 create mode 100644 common/threadpool.c
613 create mode 100644 common/threadpool.h
614
615diff --git a/Makefile b/Makefile
616index 8074ce5..9837821 100644
617--- a/Makefile
618+++ b/Makefile
619@@ -22,13 +22,14 @@ SRCSO =
620
621 CONFIG := $(shell cat config.h)
622
623-# Optional muxer module sources
624+# Optional module sources
625 ifneq ($(findstring HAVE_AVS, $(CONFIG)),)
626 SRCCLI += input/avs.c
627 endif
628
629 ifneq ($(findstring HAVE_PTHREAD, $(CONFIG)),)
630 SRCCLI += input/thread.c
631+SRCS += common/threadpool.c
632 endif
633
634 ifneq ($(findstring HAVE_LAVF, $(CONFIG)),)
635diff --git a/common/common.h b/common/common.h
636index 3d522eb..60899fe 100644
637--- a/common/common.h
638+++ b/common/common.h
639@@ -160,6 +160,7 @@ static const int x264_scan8[16+2*4+3] =
640 #include "cabac.h"
641 #include "quant.h"
642 #include "cpu.h"
643+#include "threadpool.h"
644
645 /****************************************************************************
646 * General functions
647@@ -365,9 +366,10 @@ typedef struct x264_lookahead_t
648 int i_last_keyframe;
649 int i_slicetype_length;
650 x264_frame_t *last_nonb;
651- x264_synch_frame_list_t ifbuf;
652- x264_synch_frame_list_t next;
653- x264_synch_frame_list_t ofbuf;
654+ x264_pthread_t thread_handle;
655+ x264_sync_frame_list_t ifbuf;
656+ x264_sync_frame_list_t next;
657+ x264_sync_frame_list_t ofbuf;
658 } x264_lookahead_t;
659
660 typedef struct x264_ratecontrol_t x264_ratecontrol_t;
661@@ -378,11 +380,11 @@ struct x264_t
662 x264_param_t param;
663
664 x264_t *thread[X264_THREAD_MAX+1];
665- x264_pthread_t thread_handle;
666 int b_thread_active;
667 int i_thread_phase; /* which thread to use for the next frame */
668 int i_threadslice_start; /* first row in this thread slice */
669 int i_threadslice_end; /* row after the end of this thread slice */
670+ x264_threadpool_t *threadpool;
671
672 /* bitstream output */
673 struct
674diff --git a/common/frame.c b/common/frame.c
675index c5c573f..7c2fce0 100644
676--- a/common/frame.c
677+++ b/common/frame.c
678@@ -517,7 +517,7 @@ void x264_frame_delete_list( x264_frame_t **list )
679 x264_free( list );
680 }
681
682-int x264_synch_frame_list_init( x264_synch_frame_list_t *slist, int max_size )
683+int x264_sync_frame_list_init( x264_sync_frame_list_t *slist, int max_size )
684 {
685 if( max_size < 0 )
686 return -1;
687@@ -533,7 +533,7 @@ fail:
688 return -1;
689 }
690
691-void x264_synch_frame_list_delete( x264_synch_frame_list_t *slist )
692+void x264_sync_frame_list_delete( x264_sync_frame_list_t *slist )
693 {
694 x264_pthread_mutex_destroy( &slist->mutex );
695 x264_pthread_cond_destroy( &slist->cv_fill );
696@@ -541,7 +541,7 @@ void x264_synch_frame_list_delete( x264_synch_frame_list_t *slist )
697 x264_frame_delete_list( slist->list );
698 }
699
700-void x264_synch_frame_list_push( x264_synch_frame_list_t *slist, x264_frame_t *frame )
701+void x264_sync_frame_list_push( x264_sync_frame_list_t *slist, x264_frame_t *frame )
702 {
703 x264_pthread_mutex_lock( &slist->mutex );
704 while( slist->i_size == slist->i_max_size )
705@@ -550,3 +550,16 @@ void x264_synch_frame_list_push( x264_synch_frame_list_t *slist, x264_frame_t *f
706 x264_pthread_mutex_unlock( &slist->mutex );
707 x264_pthread_cond_broadcast( &slist->cv_fill );
708 }
709+
710+x264_frame_t *x264_sync_frame_list_pop( x264_sync_frame_list_t *slist )
711+{
712+ x264_frame_t *frame;
713+ x264_pthread_mutex_lock( &slist->mutex );
714+ while( !slist->i_size )
715+ x264_pthread_cond_wait( &slist->cv_fill, &slist->mutex );
716+ frame = slist->list[ --slist->i_size ];
717+ slist->list[ slist->i_size ] = NULL;
718+ x264_pthread_cond_broadcast( &slist->cv_empty );
719+ x264_pthread_mutex_unlock( &slist->mutex );
720+ return frame;
721+}
722diff --git a/common/frame.h b/common/frame.h
723index 7d252c3..26529ce 100644
724--- a/common/frame.h
725+++ b/common/frame.h
726@@ -154,7 +154,7 @@ typedef struct
727 x264_pthread_mutex_t mutex;
728 x264_pthread_cond_t cv_fill; /* event signaling that the list became fuller */
729 x264_pthread_cond_t cv_empty; /* event signaling that the list became emptier */
730-} x264_synch_frame_list_t;
731+} x264_sync_frame_list_t;
732
733 typedef void (*x264_deblock_inter_t)( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
734 typedef void (*x264_deblock_intra_t)( pixel *pix, int stride, int alpha, int beta );
735@@ -202,9 +202,10 @@ x264_frame_t *x264_frame_pop_unused( x264_t *h, int b_fdec );
736 void x264_frame_sort( x264_frame_t **list, int b_dts );
737 void x264_frame_delete_list( x264_frame_t **list );
738
739-int x264_synch_frame_list_init( x264_synch_frame_list_t *slist, int nelem );
740-void x264_synch_frame_list_delete( x264_synch_frame_list_t *slist );
741-void x264_synch_frame_list_push( x264_synch_frame_list_t *slist, x264_frame_t *frame );
742+int x264_sync_frame_list_init( x264_sync_frame_list_t *slist, int nelem );
743+void x264_sync_frame_list_delete( x264_sync_frame_list_t *slist );
744+void x264_sync_frame_list_push( x264_sync_frame_list_t *slist, x264_frame_t *frame );
745+x264_frame_t *x264_sync_frame_list_pop( x264_sync_frame_list_t *slist );
746
747 #define x264_frame_sort_dts(list) x264_frame_sort(list, 1)
748 #define x264_frame_sort_pts(list) x264_frame_sort(list, 0)
749diff --git a/common/threadpool.c b/common/threadpool.c
750new file mode 100644
751index 0000000..4448ea2
752--- /dev/null
753+++ b/common/threadpool.c
754@@ -0,0 +1,163 @@
755+/*****************************************************************************
756+ * threadpool.c: x264 threadpool module
757+ *****************************************************************************
758+ * Copyright (C) 2010 Steven Walters <kemuri9@gmail.com>
759+ *
760+ * This program is free software; you can redistribute it and/or modify
761+ * it under the terms of the GNU General Public License as published by
762+ * the Free Software Foundation; either version 2 of the License, or
763+ * (at your option) any later version.
764+ *
765+ * This program is distributed in the hope that it will be useful,
766+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
767+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
768+ * GNU General Public License for more details.
769+ *
770+ * You should have received a copy of the GNU General Public License
771+ * along with this program; if not, write to the Free Software
772+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
773+ *****************************************************************************/
774+
775+#include "common.h"
776+
777+typedef struct
778+{
779+ void *(*func)(void *);
780+ void *arg;
781+ void *ret;
782+} x264_threadpool_job_t;
783+
784+struct x264_threadpool_t
785+{
786+ int exit;
787+ int threads;
788+ x264_pthread_t *thread_handle;
789+ void (*init_func)(void *);
790+ void *init_arg;
791+
792+ /* requires a synchronized list structure and associated methods,
793+ so use what is already implemented for frames */
794+ x264_sync_frame_list_t uninit; /* list of jobs that are awaiting use */
795+ x264_sync_frame_list_t run; /* list of jobs that are queued for processing by the pool */
796+ x264_sync_frame_list_t done; /* list of jobs that have finished processing */
797+};
798+
799+static void x264_threadpool_thread( x264_threadpool_t *pool )
800+{
801+ if( pool->init_func )
802+ pool->init_func( pool->init_arg );
803+
804+ while( !pool->exit )
805+ {
806+ x264_threadpool_job_t *job = NULL;
807+ x264_pthread_mutex_lock( &pool->run.mutex );
808+ while( !pool->exit && !pool->run.i_size )
809+ x264_pthread_cond_wait( &pool->run.cv_fill, &pool->run.mutex );
810+ if( pool->run.i_size )
811+ {
812+ job = (void*)x264_frame_shift( pool->run.list );
813+ pool->run.i_size--;
814+ }
815+ x264_pthread_mutex_unlock( &pool->run.mutex );
816+ if( !job )
817+ continue;
818+ job->ret = job->func( job->arg ); /* execute the function */
819+ x264_sync_frame_list_push( &pool->done, (void*)job );
820+ }
821+}
822+
823+int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
824+ void (*init_func)(void *), void *init_arg )
825+{
826+ if( threads <= 0 )
827+ return -1;
828+
829+ x264_threadpool_t *pool;
830+ CHECKED_MALLOCZERO( pool, sizeof(x264_threadpool_t) );
831+ *p_pool = pool;
832+
833+ pool->init_func = init_func;
834+ pool->init_arg = init_arg;
835+ pool->threads = X264_MIN( threads, X264_THREAD_MAX );
836+
837+ CHECKED_MALLOC( pool->thread_handle, pool->threads * sizeof(x264_pthread_t) );
838+
839+ if( x264_sync_frame_list_init( &pool->uninit, pool->threads ) ||
840+ x264_sync_frame_list_init( &pool->run, pool->threads ) ||
841+ x264_sync_frame_list_init( &pool->done, pool->threads ) )
842+ goto fail;
843+
844+ for( int i = 0; i < pool->threads; i++ )
845+ {
846+ x264_threadpool_job_t *job;
847+ CHECKED_MALLOC( job, sizeof(x264_threadpool_job_t) );
848+ x264_sync_frame_list_push( &pool->uninit, (void*)job );
849+ }
850+ for( int i = 0; i < pool->threads; i++ )
851+ if( x264_pthread_create( pool->thread_handle+i, NULL, (void*)x264_threadpool_thread, pool ) )
852+ goto fail;
853+
854+ return 0;
855+fail:
856+ return -1;
857+}
858+
859+void x264_threadpool_run( x264_threadpool_t *pool, void *(*func)(void *), void *arg )
860+{
861+ x264_threadpool_job_t *job = (void*)x264_sync_frame_list_pop( &pool->uninit );
862+ job->func = func;
863+ job->arg = arg;
864+ x264_sync_frame_list_push( &pool->run, (void*)job );
865+}
866+
867+void *x264_threadpool_wait( x264_threadpool_t *pool, void *arg )
868+{
869+ x264_threadpool_job_t *job = NULL;
870+
871+ x264_pthread_mutex_lock( &pool->done.mutex );
872+ while( !job )
873+ {
874+ for( int i = 0; i < pool->done.i_size; i++ )
875+ {
876+ x264_threadpool_job_t *t = (void*)pool->done.list[i];
877+ if( t->arg == arg )
878+ {
879+ job = (void*)x264_frame_shift( pool->done.list+i );
880+ pool->done.i_size--;
881+ }
882+ }
883+ if( !job )
884+ x264_pthread_cond_wait( &pool->done.cv_fill, &pool->done.mutex );
885+ }
886+ x264_pthread_mutex_unlock( &pool->done.mutex );
887+
888+ void *ret = job->ret;
889+ x264_sync_frame_list_push( &pool->uninit, (void*)job );
890+ return ret;
891+}
892+
893+static void x264_threadpool_list_delete( x264_sync_frame_list_t *slist )
894+{
895+ for( int i = 0; slist->list[i]; i++ )
896+ {
897+ x264_free( slist->list[i] );
898+ slist->list[i] = NULL;
899+ }
900+ x264_sync_frame_list_delete( slist );
901+}
902+
903+void x264_threadpool_delete( x264_threadpool_t *pool )
904+{
905+ x264_pthread_mutex_lock( &pool->run.mutex );
906+ pool->exit = 1;
907+ x264_pthread_cond_broadcast( &pool->run.cv_fill );
908+ x264_pthread_mutex_unlock( &pool->run.mutex );
909+ for( int i = 0; i < pool->threads; i++ )
910+ x264_pthread_join( pool->thread_handle[i], NULL );
911+
912+ x264_threadpool_list_delete( &pool->uninit );
913+ x264_threadpool_list_delete( &pool->run );
914+ x264_threadpool_list_delete( &pool->done );
915+ x264_free( pool->thread_handle );
916+ x264_free( pool );
917+}
918diff --git a/common/threadpool.h b/common/threadpool.h
919new file mode 100644
920index 0000000..519737c
921--- /dev/null
922+++ b/common/threadpool.h
923@@ -0,0 +1,39 @@
924+/*****************************************************************************
925+ * threadpool.h: x264 threadpool module
926+ *****************************************************************************
927+ * Copyright (C) 2010 Steven Walters <kemuri9@gmail.com>
928+ *
929+ * This program is free software; you can redistribute it and/or modify
930+ * it under the terms of the GNU General Public License as published by
931+ * the Free Software Foundation; either version 2 of the License, or
932+ * (at your option) any later version.
933+ *
934+ * This program is distributed in the hope that it will be useful,
935+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
936+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
937+ * GNU General Public License for more details.
938+ *
939+ * You should have received a copy of the GNU General Public License
940+ * along with this program; if not, write to the Free Software
941+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
942+ *****************************************************************************/
943+
944+#ifndef X264_THREADPOOL_H
945+#define X264_THREADPOOL_H
946+
947+typedef struct x264_threadpool_t x264_threadpool_t;
948+
949+#if HAVE_PTHREAD
950+int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
951+ void (*init_func)(void *), void *init_arg );
952+void x264_threadpool_run( x264_threadpool_t *pool, void *(*func)(void *), void *arg );
953+void *x264_threadpool_wait( x264_threadpool_t *pool, void *arg );
954+void x264_threadpool_delete( x264_threadpool_t *pool );
955+#else
956+#define x264_threadpool_init(p,t,f,a) -1
957+#define x264_threadpool_run(p,f,a)
958+#define x264_threadpool_wait(p,a) NULL
959+#define x264_threadpool_delete(p)
960+#endif
961+
962+#endif
963diff --git a/encoder/encoder.c b/encoder/encoder.c
964index aebb536..283783d 100644
965--- a/encoder/encoder.c
966+++ b/encoder/encoder.c
967@@ -349,6 +349,20 @@ fail:
968 return -1;
969 }
970
971+#if HAVE_PTHREAD
972+static void x264_encoder_thread_init( x264_t *h )
973+{
974+ if( h->param.i_sync_lookahead )
975+ x264_lower_thread_priority( 10 );
976+
977+#if HAVE_MMX
978+ /* Misalign mask has to be set separately for each thread. */
979+ if( h->param.cpu&X264_CPU_SSE_MISALIGN )
980+ x264_cpu_mask_misalign_sse();
981+#endif
982+}
983+#endif
984+
985 /****************************************************************************
986 *
987 ****************************************************************************
988@@ -1052,6 +1066,10 @@ x264_t *x264_encoder_open( x264_param_t *param )
989 CHECKED_MALLOC( h->nal_buffer, h->out.i_bitstream * 3/2 + 4 );
990 h->nal_buffer_size = h->out.i_bitstream * 3/2 + 4;
991
992+ if( h->param.i_threads > 1 &&
993+ x264_threadpool_init( &h->threadpool, h->param.i_threads, (void*)x264_encoder_thread_init, h ) )
994+ goto fail;
995+
996 h->thread[0] = h;
997 for( int i = 1; i < h->param.i_threads + !!h->param.i_sync_lookahead; i++ )
998 CHECKED_MALLOC( h->thread[i], sizeof(x264_t) );
999@@ -2044,14 +2062,6 @@ static void *x264_slices_write( x264_t *h )
1000 {
1001 int i_slice_num = 0;
1002 int last_thread_mb = h->sh.i_last_mb;
1003- if( h->param.i_sync_lookahead )
1004- x264_lower_thread_priority( 10 );
1005-
1006-#if HAVE_MMX
1007- /* Misalign mask has to be set separately for each thread. */
1008- if( h->param.cpu&X264_CPU_SSE_MISALIGN )
1009- x264_cpu_mask_misalign_sse();
1010-#endif
1011
1012 #if HAVE_VISUALIZE
1013 if( h->param.b_visualize )
1014@@ -2093,11 +2103,6 @@ static void *x264_slices_write( x264_t *h )
1015
1016 static int x264_threaded_slices_write( x264_t *h )
1017 {
1018- void *ret = NULL;
1019-#if HAVE_MMX
1020- if( h->param.cpu&X264_CPU_SSE_MISALIGN )
1021- x264_cpu_mask_misalign_sse();
1022-#endif
1023 /* set first/last mb and sync contexts */
1024 for( int i = 0; i < h->param.i_threads; i++ )
1025 {
1026@@ -2121,16 +2126,14 @@ static int x264_threaded_slices_write( x264_t *h )
1027 /* dispatch */
1028 for( int i = 0; i < h->param.i_threads; i++ )
1029 {
1030- if( x264_pthread_create( &h->thread[i]->thread_handle, NULL, (void*)x264_slices_write, (void*)h->thread[i] ) )
1031- return -1;
1032+ x264_threadpool_run( h->threadpool, (void*)x264_slices_write, h->thread[i] );
1033 h->thread[i]->b_thread_active = 1;
1034 }
1035 for( int i = 0; i < h->param.i_threads; i++ )
1036 {
1037- x264_pthread_join( h->thread[i]->thread_handle, &ret );
1038 h->thread[i]->b_thread_active = 0;
1039- if( (intptr_t)ret )
1040- return (intptr_t)ret;
1041+ if( (intptr_t)x264_threadpool_wait( h->threadpool, h->thread[i] ) )
1042+ return -1;
1043 }
1044
1045 /* Go back and fix up the hpel on the borders between slices. */
1046@@ -2206,6 +2209,10 @@ int x264_encoder_encode( x264_t *h,
1047 thread_current =
1048 thread_oldest = h;
1049 }
1050+#if HAVE_MMX
1051+ if( h->i_thread_frames == 1 && h->param.cpu&X264_CPU_SSE_MISALIGN )
1052+ x264_cpu_mask_misalign_sse();
1053+#endif
1054
1055 // ok to call this before encoding any frames, since the initial values of fdec have b_kept_as_ref=0
1056 if( x264_reference_update( h ) )
1057@@ -2529,8 +2536,7 @@ int x264_encoder_encode( x264_t *h,
1058 h->i_threadslice_end = h->mb.i_mb_height;
1059 if( h->i_thread_frames > 1 )
1060 {
1061- if( x264_pthread_create( &h->thread_handle, NULL, (void*)x264_slices_write, h ) )
1062- return -1;
1063+ x264_threadpool_run( h->threadpool, (void*)x264_slices_write, h );
1064 h->b_thread_active = 1;
1065 }
1066 else if( h->param.b_sliced_threads )
1067@@ -2553,11 +2559,9 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
1068
1069 if( h->b_thread_active )
1070 {
1071- void *ret = NULL;
1072- x264_pthread_join( h->thread_handle, &ret );
1073 h->b_thread_active = 0;
1074- if( (intptr_t)ret )
1075- return (intptr_t)ret;
1076+ if( (intptr_t)x264_threadpool_wait( h->threadpool, h ) )
1077+ return -1;
1078 }
1079 if( !h->out.i_nal )
1080 {
1081@@ -2821,25 +2825,20 @@ void x264_encoder_close ( x264_t *h )
1082 x264_lookahead_delete( h );
1083
1084 if( h->param.i_threads > 1 )
1085+ x264_threadpool_delete( h->threadpool );
1086+ if( h->i_thread_frames > 1 )
1087 {
1088- // don't strictly have to wait for the other threads, but it's simpler than canceling them
1089- for( int i = 0; i < h->param.i_threads; i++ )
1090+ for( int i = 0; i < h->i_thread_frames; i++ )
1091 if( h->thread[i]->b_thread_active )
1092- x264_pthread_join( h->thread[i]->thread_handle, NULL );
1093- if( h->i_thread_frames > 1 )
1094- {
1095- for( int i = 0; i < h->i_thread_frames; i++ )
1096- if( h->thread[i]->b_thread_active )
1097- {
1098- assert( h->thread[i]->fenc->i_reference_count == 1 );
1099- x264_frame_delete( h->thread[i]->fenc );
1100- }
1101+ {
1102+ assert( h->thread[i]->fenc->i_reference_count == 1 );
1103+ x264_frame_delete( h->thread[i]->fenc );
1104+ }
1105
1106- x264_t *thread_prev = h->thread[h->i_thread_phase];
1107- x264_thread_sync_ratecontrol( h, thread_prev, h );
1108- x264_thread_sync_ratecontrol( thread_prev, thread_prev, h );
1109- h->i_frame = thread_prev->i_frame + 1 - h->i_thread_frames;
1110- }
1111+ x264_t *thread_prev = h->thread[h->i_thread_phase];
1112+ x264_thread_sync_ratecontrol( h, thread_prev, h );
1113+ x264_thread_sync_ratecontrol( thread_prev, thread_prev, h );
1114+ h->i_frame = thread_prev->i_frame + 1 - h->i_thread_frames;
1115 }
1116 h->i_frame++;
1117
1118diff --git a/encoder/lookahead.c b/encoder/lookahead.c
1119index a79d4b1..f0af216 100644
1120--- a/encoder/lookahead.c
1121+++ b/encoder/lookahead.c
1122@@ -37,7 +37,7 @@
1123 #include "common/common.h"
1124 #include "analyse.h"
1125
1126-static void x264_lookahead_shift( x264_synch_frame_list_t *dst, x264_synch_frame_list_t *src, int count )
1127+static void x264_lookahead_shift( x264_sync_frame_list_t *dst, x264_sync_frame_list_t *src, int count )
1128 {
1129 int i = count;
1130 while( i-- )
1131@@ -137,9 +137,9 @@ int x264_lookahead_init( x264_t *h, int i_slicetype_length )
1132 look->i_slicetype_length = i_slicetype_length;
1133
1134 /* init frame lists */
1135- if( x264_synch_frame_list_init( &look->ifbuf, h->param.i_sync_lookahead+3 ) ||
1136- x264_synch_frame_list_init( &look->next, h->frames.i_delay+3 ) ||
1137- x264_synch_frame_list_init( &look->ofbuf, h->frames.i_delay+3 ) )
1138+ if( x264_sync_frame_list_init( &look->ifbuf, h->param.i_sync_lookahead+3 ) ||
1139+ x264_sync_frame_list_init( &look->next, h->frames.i_delay+3 ) ||
1140+ x264_sync_frame_list_init( &look->ofbuf, h->frames.i_delay+3 ) )
1141 goto fail;
1142
1143 if( !h->param.i_sync_lookahead )
1144@@ -153,7 +153,7 @@ int x264_lookahead_init( x264_t *h, int i_slicetype_length )
1145 if( x264_macroblock_thread_allocate( look_h, 1 ) < 0 )
1146 goto fail;
1147
1148- if( x264_pthread_create( &look_h->thread_handle, NULL, (void *)x264_lookahead_thread, look_h ) )
1149+ if( x264_pthread_create( &look->thread_handle, NULL, (void*)x264_lookahead_thread, look_h ) )
1150 goto fail;
1151 look->b_thread_active = 1;
1152
1153@@ -171,25 +171,25 @@ void x264_lookahead_delete( x264_t *h )
1154 h->lookahead->b_exit_thread = 1;
1155 x264_pthread_cond_broadcast( &h->lookahead->ifbuf.cv_fill );
1156 x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex );
1157- x264_pthread_join( h->thread[h->param.i_threads]->thread_handle, NULL );
1158+ x264_pthread_join( h->lookahead->thread_handle, NULL );
1159 x264_macroblock_cache_free( h->thread[h->param.i_threads] );
1160 x264_macroblock_thread_free( h->thread[h->param.i_threads], 1 );
1161 x264_free( h->thread[h->param.i_threads] );
1162 }
1163- x264_synch_frame_list_delete( &h->lookahead->ifbuf );
1164- x264_synch_frame_list_delete( &h->lookahead->next );
1165+ x264_sync_frame_list_delete( &h->lookahead->ifbuf );
1166+ x264_sync_frame_list_delete( &h->lookahead->next );
1167 if( h->lookahead->last_nonb )
1168 x264_frame_push_unused( h, h->lookahead->last_nonb );
1169- x264_synch_frame_list_delete( &h->lookahead->ofbuf );
1170+ x264_sync_frame_list_delete( &h->lookahead->ofbuf );
1171 x264_free( h->lookahead );
1172 }
1173
1174 void x264_lookahead_put_frame( x264_t *h, x264_frame_t *frame )
1175 {
1176 if( h->param.i_sync_lookahead )
1177- x264_synch_frame_list_push( &h->lookahead->ifbuf, frame );
1178+ x264_sync_frame_list_push( &h->lookahead->ifbuf, frame );
1179 else
1180- x264_synch_frame_list_push( &h->lookahead->next, frame );
1181+ x264_sync_frame_list_push( &h->lookahead->next, frame );
1182 }
1183
1184 int x264_lookahead_is_empty( x264_t *h )
1185diff --git a/input/thread.c b/input/thread.c
1186index a88cfae..c4b07fa 100644
1187--- a/input/thread.c
1188+++ b/input/thread.c
1189@@ -30,10 +30,9 @@ typedef struct
1190 cli_input_t input;
1191 hnd_t p_handle;
1192 x264_picture_t pic;
1193- x264_pthread_t tid;
1194+ x264_threadpool_t *pool;
1195 int next_frame;
1196 int frame_total;
1197- int in_progress;
1198 struct thread_input_arg_t *next_args;
1199 } thread_hnd_t;
1200
1201@@ -55,7 +54,6 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1202 }
1203 h->input = input;
1204 h->p_handle = *p_handle;
1205- h->in_progress = 0;
1206 h->next_frame = -1;
1207 h->next_args = malloc( sizeof(thread_input_arg_t) );
1208 if( !h->next_args )
1209@@ -66,6 +64,9 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1210 thread_input.picture_alloc = h->input.picture_alloc;
1211 thread_input.picture_clean = h->input.picture_clean;
1212
1213+ if( x264_threadpool_init( &h->pool, 1, NULL, NULL ) )
1214+ return -1;
1215+
1216 *p_handle = h;
1217 return 0;
1218 }
1219@@ -88,9 +89,8 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
1220
1221 if( h->next_frame >= 0 )
1222 {
1223- x264_pthread_join( h->tid, NULL );
1224+ x264_threadpool_wait( h->pool, h->next_args );
1225 ret |= h->next_args->status;
1226- h->in_progress = 0;
1227 }
1228
1229 if( h->next_frame == i_frame )
1230@@ -103,9 +103,7 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
1231 h->next_frame =
1232 h->next_args->i_frame = i_frame+1;
1233 h->next_args->pic = &h->pic;
1234- if( x264_pthread_create( &h->tid, NULL, (void*)read_frame_thread_int, h->next_args ) )
1235- return -1;
1236- h->in_progress = 1;
1237+ x264_threadpool_run( h->pool, (void*)read_frame_thread_int, h->next_args );
1238 }
1239 else
1240 h->next_frame = -1;
1241@@ -124,8 +122,7 @@ static int release_frame( x264_picture_t *pic, hnd_t handle )
1242 static int close_file( hnd_t handle )
1243 {
1244 thread_hnd_t *h = handle;
1245- if( h->in_progress )
1246- x264_pthread_join( h->tid, NULL );
1247+ x264_threadpool_delete( h->pool );
1248 h->input.close_file( h->p_handle );
1249 h->input.picture_clean( &h->pic );
1250 free( h->next_args );
1251--
12521.7.0.4
1253
1254
1255From 72a4e9be276c6a9700a108d84c4072dc3ba00317 Mon Sep 17 00:00:00 2001
1256From: Lamont Alston <wewk584@gmail.com>
1257Date: Wed, 16 Jun 2010 10:05:17 -0700
1258Subject: [PATCH 08/10] Add open-GOP support
1259
1260---
1261 common/common.c | 13 +++++++++-
1262 common/common.h | 6 ++++-
1263 encoder/encoder.c | 45 +++++++++++++++++++++++++-------------
1264 encoder/ratecontrol.c | 1 +
1265 encoder/slicetype.c | 57 +++++++++++++++++++++++++++++++++++++------------
1266 x264.c | 11 ++++++++-
1267 x264.h | 8 ++++++-
1268 7 files changed, 107 insertions(+), 34 deletions(-)
1269
1270diff --git a/common/common.c b/common/common.c
1271index cb1aa9c..d61d82a 100644
1272--- a/common/common.c
1273+++ b/common/common.c
1274@@ -676,6 +676,15 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
1275 p->i_bframe_pyramid = atoi(value);
1276 }
1277 }
1278+ OPT("open-gop")
1279+ {
1280+ b_error |= parse_enum( value, x264_open_gop_names, &p->i_open_gop );
1281+ if( b_error )
1282+ {
1283+ b_error = 0;
1284+ p->i_open_gop = atoi(value);
1285+ }
1286+ }
1287 OPT("nf")
1288 p->b_deblocking_filter = !atobool(value);
1289 OPT2("filter", "deblock")
1290@@ -1190,9 +1199,9 @@ char *x264_param2string( x264_param_t *p, int b_res )
1291 s += sprintf( s, " bframes=%d", p->i_bframe );
1292 if( p->i_bframe )
1293 {
1294- s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d weightb=%d",
1295+ s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d weightb=%d open_gop=%d",
1296 p->i_bframe_pyramid, p->i_bframe_adaptive, p->i_bframe_bias,
1297- p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred );
1298+ p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred, p->i_open_gop );
1299 }
1300 s += sprintf( s, " weightp=%d", p->analyse.i_weighted_pred > 0 ? p->analyse.i_weighted_pred : 0 );
1301
1302diff --git a/common/common.h b/common/common.h
1303index 60899fe..dfa1121 100644
1304--- a/common/common.h
1305+++ b/common/common.h
1306@@ -471,7 +471,11 @@ struct x264_t
1307 /* frames used for reference + sentinels */
1308 x264_frame_t *reference[16+2];
1309
1310- int i_last_keyframe; /* Frame number of the last keyframe */
1311+ int i_last_keyframe; /* Frame number of the last keyframe */
1312+ int i_last_idr; /* Frame number of the last IDR (not RP)*/
1313+ int i_poc_last_open_gop; /* Poc of the I frame of the last open-gop. The value
1314+ * is only assigned during the period between that
1315+ * I frame and the next P or I frame, else -1 */
1316
1317 int i_input; /* Number of input frames already accepted */
1318
1319diff --git a/encoder/encoder.c b/encoder/encoder.c
1320index 283783d..aa84192 100644
1321--- a/encoder/encoder.c
1322+++ b/encoder/encoder.c
1323@@ -573,12 +573,10 @@ static int x264_validate_parameters( x264_t *h )
1324 x264_log( h, X264_LOG_WARNING, "subme=0 + direct=temporal is not supported\n" );
1325 h->param.analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
1326 }
1327- h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_BFRAME_MAX );
1328+ h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_MIN( X264_BFRAME_MAX, h->param.i_keyint_max-1 ) );
1329+ h->param.i_open_gop = x264_clip3( h->param.i_open_gop, X264_OPEN_GOP_NONE, X264_OPEN_GOP_CODED_ORDER );
1330 if( h->param.i_keyint_max == 1 )
1331- {
1332- h->param.i_bframe = 0;
1333 h->param.b_intra_refresh = 0;
1334- }
1335 h->param.i_bframe_bias = x264_clip3( h->param.i_bframe_bias, -90, 100 );
1336 if( h->param.i_bframe <= 1 )
1337 h->param.i_bframe_pyramid = X264_B_PYRAMID_NONE;
1338@@ -588,6 +586,7 @@ static int x264_validate_parameters( x264_t *h )
1339 h->param.i_bframe_adaptive = X264_B_ADAPT_NONE;
1340 h->param.analyse.i_direct_mv_pred = 0;
1341 h->param.analyse.b_weighted_bipred = 0;
1342+ h->param.i_open_gop = X264_OPEN_GOP_NONE;
1343 }
1344 if( h->param.b_intra_refresh && h->param.i_bframe_pyramid == X264_B_PYRAMID_NORMAL )
1345 {
1346@@ -599,6 +598,11 @@ static int x264_validate_parameters( x264_t *h )
1347 x264_log( h, X264_LOG_WARNING, "ref > 1 + intra-refresh is not supported\n" );
1348 h->param.i_frame_reference = 1;
1349 }
1350+ if( h->param.b_intra_refresh && h->param.i_open_gop )
1351+ {
1352+ x264_log( h, X264_LOG_WARNING, "intra-refresh is not compatible with open-gop\n" );
1353+ h->param.i_open_gop = X264_OPEN_GOP_NONE;
1354+ }
1355 if( h->param.i_keyint_min == X264_KEYINT_MIN_AUTO )
1356 h->param.i_keyint_min = h->param.i_keyint_max / 10;
1357 h->param.i_keyint_min = x264_clip3( h->param.i_keyint_min, 1, h->param.i_keyint_max/2+1 );
1358@@ -978,9 +982,11 @@ x264_t *x264_encoder_open( x264_param_t *param )
1359 h->frames.b_have_lowres |= h->param.rc.b_stat_read && h->param.rc.i_vbv_buffer_size > 0;
1360 h->frames.b_have_sub8x8_esa = !!(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
1361
1362+ h->frames.i_last_idr =
1363 h->frames.i_last_keyframe = - h->param.i_keyint_max;
1364 h->frames.i_input = 0;
1365 h->frames.i_largest_pts = h->frames.i_second_largest_pts = -1;
1366+ h->frames.i_poc_last_open_gop = -1;
1367
1368 CHECKED_MALLOCZERO( h->frames.unused[0], (h->frames.i_delay + 3) * sizeof(x264_frame_t *) );
1369 /* Allocate room for max refs plus a few extra just in case. */
1370@@ -1688,35 +1694,37 @@ static inline void x264_reference_hierarchy_reset( x264_t *h )
1371 {
1372 int ref;
1373 int b_hasdelayframe = 0;
1374- if( !h->param.i_bframe_pyramid )
1375- return;
1376
1377 /* look for delay frames -- chain must only contain frames that are disposable */
1378 for( int i = 0; h->frames.current[i] && IS_DISPOSABLE( h->frames.current[i]->i_type ); i++ )
1379 b_hasdelayframe |= h->frames.current[i]->i_coded
1380 != h->frames.current[i]->i_frame + h->sps->vui.i_num_reorder_frames;
1381
1382- if( h->param.i_bframe_pyramid != X264_B_PYRAMID_STRICT && !b_hasdelayframe )
1383+ /* This function must handle b-pyramid and clear frames for open-gop */
1384+ if( h->param.i_bframe_pyramid != X264_B_PYRAMID_STRICT && !b_hasdelayframe && h->frames.i_poc_last_open_gop == -1 )
1385 return;
1386
1387 /* Remove last BREF. There will never be old BREFs in the
1388 * dpb during a BREF decode when pyramid == STRICT */
1389 for( ref = 0; h->frames.reference[ref]; ref++ )
1390 {
1391- if( h->param.i_bframe_pyramid == X264_B_PYRAMID_STRICT
1392+ if( ( h->param.i_bframe_pyramid == X264_B_PYRAMID_STRICT
1393 && h->frames.reference[ref]->i_type == X264_TYPE_BREF )
1394+ || ( h->frames.reference[ref]->i_poc < h->frames.i_poc_last_open_gop
1395+ && h->sh.i_type != SLICE_TYPE_B ) )
1396 {
1397 int diff = h->i_frame_num - h->frames.reference[ref]->i_frame_num;
1398 h->sh.mmco[h->sh.i_mmco_command_count].i_difference_of_pic_nums = diff;
1399 h->sh.mmco[h->sh.i_mmco_command_count++].i_poc = h->frames.reference[ref]->i_poc;
1400- x264_frame_push_unused( h, x264_frame_pop( h->frames.reference ) );
1401+ x264_frame_push_unused( h, x264_frame_shift( &h->frames.reference[ref] ) );
1402 h->b_ref_reorder[0] = 1;
1403- break;
1404+ ref--;
1405 }
1406 }
1407
1408- /* Prepare to room in the dpb for the delayed display time of the later b-frame's */
1409- h->sh.i_mmco_remove_from_end = X264_MAX( ref + 2 - h->frames.i_max_dpb, 0 );
1410+ /* Prepare room in the dpb for the delayed display time of the later b-frame's */
1411+ if( h->param.i_bframe_pyramid )
1412+ h->sh.i_mmco_remove_from_end = X264_MAX( ref + 2 - h->frames.i_max_dpb, 0 );
1413 }
1414
1415 static inline void x264_slice_init( x264_t *h, int i_nal_type, int i_global_qp )
1416@@ -2321,12 +2329,17 @@ int x264_encoder_encode( x264_t *h,
1417 {
1418 h->frames.i_last_keyframe = h->fenc->i_frame;
1419 if( h->fenc->i_type == X264_TYPE_IDR )
1420+ {
1421 h->i_frame_num = 0;
1422+ h->frames.i_last_idr = h->fenc->i_frame;
1423+ }
1424 }
1425 h->sh.i_mmco_command_count =
1426 h->sh.i_mmco_remove_from_end = 0;
1427 h->b_ref_reorder[0] =
1428 h->b_ref_reorder[1] = 0;
1429+ h->fdec->i_poc =
1430+ h->fenc->i_poc = 2 * ( h->fenc->i_frame - X264_MAX( h->frames.i_last_idr, 0 ) );
1431
1432 /* ------------------- Setup frame context ----------------------------- */
1433 /* 5: Init data dependent of frame type */
1434@@ -2337,6 +2350,7 @@ int x264_encoder_encode( x264_t *h,
1435 i_nal_ref_idc = NAL_PRIORITY_HIGHEST;
1436 h->sh.i_type = SLICE_TYPE_I;
1437 x264_reference_reset( h );
1438+ h->frames.i_poc_last_open_gop = -1;
1439 }
1440 else if( h->fenc->i_type == X264_TYPE_I )
1441 {
1442@@ -2344,6 +2358,8 @@ int x264_encoder_encode( x264_t *h,
1443 i_nal_ref_idc = NAL_PRIORITY_HIGH; /* Not completely true but for now it is (as all I/P are kept as ref)*/
1444 h->sh.i_type = SLICE_TYPE_I;
1445 x264_reference_hierarchy_reset( h );
1446+ if( h->param.i_open_gop )
1447+ h->frames.i_poc_last_open_gop = h->fenc->b_keyframe ? h->fenc->i_poc : -1;
1448 }
1449 else if( h->fenc->i_type == X264_TYPE_P )
1450 {
1451@@ -2351,6 +2367,7 @@ int x264_encoder_encode( x264_t *h,
1452 i_nal_ref_idc = NAL_PRIORITY_HIGH; /* Not completely true but for now it is (as all I/P are kept as ref)*/
1453 h->sh.i_type = SLICE_TYPE_P;
1454 x264_reference_hierarchy_reset( h );
1455+ h->frames.i_poc_last_open_gop = -1;
1456 }
1457 else if( h->fenc->i_type == X264_TYPE_BREF )
1458 {
1459@@ -2366,8 +2383,6 @@ int x264_encoder_encode( x264_t *h,
1460 h->sh.i_type = SLICE_TYPE_B;
1461 }
1462
1463- h->fdec->i_poc =
1464- h->fenc->i_poc = 2 * (h->fenc->i_frame - h->frames.i_last_keyframe);
1465 h->fdec->i_type = h->fenc->i_type;
1466 h->fdec->i_frame = h->fenc->i_frame;
1467 h->fenc->b_kept_as_ref =
1468@@ -2484,7 +2499,7 @@ int x264_encoder_encode( x264_t *h,
1469
1470 if( h->fenc->i_type != X264_TYPE_IDR )
1471 {
1472- int time_to_recovery = X264_MIN( h->mb.i_mb_width - 1, h->param.i_keyint_max ) + h->param.i_bframe;
1473+ int time_to_recovery = h->param.i_open_gop ? 0 : X264_MIN( h->mb.i_mb_width - 1, h->param.i_keyint_max ) + h->param.i_bframe;
1474 x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
1475 x264_sei_recovery_point_write( h, &h->out.bs, time_to_recovery );
1476 x264_nal_end( h );
1477diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
1478index 16afbf0..1030ef2 100644
1479--- a/encoder/ratecontrol.c
1480+++ b/encoder/ratecontrol.c
1481@@ -724,6 +724,7 @@ int x264_ratecontrol_new( x264_t *h )
1482 CMP_OPT_FIRST_PASS( "b_pyramid", h->param.i_bframe_pyramid );
1483 CMP_OPT_FIRST_PASS( "intra_refresh", h->param.b_intra_refresh );
1484 CMP_OPT_FIRST_PASS( "keyint", h->param.i_keyint_max );
1485+ CMP_OPT_FIRST_PASS( "open_gop", h->param.i_open_gop );
1486
1487 if( strstr( opts, "qp=0" ) && h->param.rc.i_rc_method == X264_RC_ABR )
1488 x264_log( h, X264_LOG_WARNING, "1st pass was lossless, bitrate prediction will be inaccurate\n" );
1489diff --git a/encoder/slicetype.c b/encoder/slicetype.c
1490index 60f3a24..0a2514d 100644
1491--- a/encoder/slicetype.c
1492+++ b/encoder/slicetype.c
1493@@ -1058,7 +1058,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
1494 {
1495 x264_mb_analysis_t a;
1496 x264_frame_t *frames[X264_LOOKAHEAD_MAX+3] = { NULL, };
1497- int num_frames, orig_num_frames, keyint_limit, idr_frame_type, framecnt;
1498+ int num_frames, orig_num_frames, keyint_limit, framecnt;
1499 int i_mb_count = NUM_MBS;
1500 int cost1p0, cost2p0, cost1b1, cost2p1;
1501 int i_max_search = X264_MIN( h->lookahead->next.i_size, X264_LOOKAHEAD_MAX );
1502@@ -1080,7 +1080,6 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
1503 orig_num_frames = num_frames = h->param.b_intra_refresh ? framecnt : X264_MIN( framecnt, keyint_limit );
1504
1505 x264_lowres_context_init( h, &a );
1506- idr_frame_type = frames[1]->i_frame - h->lookahead->i_last_keyframe >= h->param.i_keyint_min ? X264_TYPE_IDR : X264_TYPE_I;
1507
1508 /* This is important psy-wise: if we have a non-scenecut keyframe,
1509 * there will be significant visual artifacts if the frames just before
1510@@ -1092,12 +1091,12 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
1511 {
1512 frames[1]->i_type = X264_TYPE_P;
1513 if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1, 1, orig_num_frames ) )
1514- frames[1]->i_type = idr_frame_type;
1515+ frames[1]->i_type = X264_TYPE_I;
1516 return;
1517 }
1518 else if( num_frames == 0 )
1519 {
1520- frames[1]->i_type = idr_frame_type;
1521+ frames[1]->i_type = X264_TYPE_I;
1522 return;
1523 }
1524
1525@@ -1106,7 +1105,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
1526 int reset_start;
1527 if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1, 1, orig_num_frames ) )
1528 {
1529- frames[1]->i_type = idr_frame_type;
1530+ frames[1]->i_type = X264_TYPE_I;
1531 return;
1532 }
1533
1534@@ -1210,15 +1209,19 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
1535
1536 /* Enforce keyframe limit. */
1537 if( !h->param.b_intra_refresh )
1538- for( int j = 0; j < num_frames; j++ )
1539+ for( int i = keyint_limit+1; i <= num_frames; i += h->param.i_keyint_max )
1540 {
1541- if( ((j-keyint_limit) % h->param.i_keyint_max) == 0 )
1542+ int j = i;
1543+ if( h->param.i_open_gop == X264_OPEN_GOP_CODED_ORDER )
1544 {
1545- if( j && h->param.i_keyint_max > 1 )
1546- frames[j]->i_type = X264_TYPE_P;
1547- frames[j+1]->i_type = X264_TYPE_IDR;
1548- reset_start = X264_MIN( reset_start, j+2 );
1549+ while( IS_X264_TYPE_B( frames[i]->i_type ) )
1550+ i++;
1551+ while( IS_X264_TYPE_B( frames[j-1]->i_type ) )
1552+ j--;
1553 }
1554+ frames[i]->i_type = X264_TYPE_I;
1555+ reset_start = X264_MIN( reset_start, i+1 );
1556+ i = j;
1557 }
1558
1559 if( h->param.rc.i_vbv_buffer_size )
1560@@ -1303,13 +1306,39 @@ void x264_slicetype_decide( x264_t *h )
1561 frm->i_frame, x264_b_pyramid_names[h->param.i_bframe_pyramid], h->param.i_frame_reference );
1562 }
1563
1564+ if( frm->i_type == X264_TYPE_KEYFRAME )
1565+ frm->i_type = h->param.i_open_gop ? X264_TYPE_I : X264_TYPE_IDR;
1566+
1567 /* Limit GOP size */
1568 if( (!h->param.b_intra_refresh || frm->i_frame == 0) && frm->i_frame - h->lookahead->i_last_keyframe >= h->param.i_keyint_max )
1569 {
1570- if( frm->i_type == X264_TYPE_AUTO )
1571+ if( frm->i_type == X264_TYPE_AUTO || frm->i_type == X264_TYPE_I )
1572+ frm->i_type = h->param.i_open_gop && h->lookahead->i_last_keyframe >= 0 ? X264_TYPE_I : X264_TYPE_IDR;
1573+ int warn = frm->i_type != X264_TYPE_IDR;
1574+ if( warn && h->param.i_open_gop == X264_OPEN_GOP_DISPLAY_ORDER )
1575+ warn &= frm->i_type != X264_TYPE_I;
1576+ if( warn && h->param.i_open_gop == X264_OPEN_GOP_CODED_ORDER )
1577+ {
1578+ /* if this minigop ends with i, it's not a violation */
1579+ int j = bframes;
1580+ while( IS_X264_TYPE_B( h->lookahead->next.list[j]->i_type ) )
1581+ j++;
1582+ warn = h->lookahead->next.list[j]->i_type != X264_TYPE_I && h->lookahead->next.list[j]->i_type != X264_TYPE_KEYFRAME;
1583+ }
1584+ if( warn )
1585+ x264_log( h, X264_LOG_WARNING, "specified frame type (%d) at %d is not compatible with keyframe interval\n", frm->i_type, frm->i_frame );
1586+ }
1587+ if( frm->i_type == X264_TYPE_I && frm->i_frame - h->lookahead->i_last_keyframe >= h->param.i_keyint_min )
1588+ {
1589+ if( h->param.i_open_gop )
1590+ {
1591+ h->lookahead->i_last_keyframe = frm->i_frame; // Use display order
1592+ if( h->param.i_open_gop == X264_OPEN_GOP_CODED_ORDER )
1593+ h->lookahead->i_last_keyframe -= bframes; // Use coded order
1594+ frm->b_keyframe = 1;
1595+ }
1596+ else
1597 frm->i_type = X264_TYPE_IDR;
1598- if( frm->i_type != X264_TYPE_IDR )
1599- x264_log( h, X264_LOG_WARNING, "specified frame type (%d) is not compatible with keyframe interval\n", frm->i_type );
1600 }
1601 if( frm->i_type == X264_TYPE_IDR )
1602 {
1603diff --git a/x264.c b/x264.c
1604index 09bad61..fa73b5c 100644
1605--- a/x264.c
1606+++ b/x264.c
1607@@ -380,6 +380,12 @@ static void Help( x264_param_t *defaults, int longhelp )
1608 " - strict: Strictly hierarchical pyramid\n"
1609 " - normal: Non-strict (not Blu-ray compatible)\n",
1610 strtable_lookup( x264_b_pyramid_names, defaults->i_bframe_pyramid ) );
1611+ H1( " --open-gop <string> Use recovery points to close GOPs [none]\n"
1612+ " - none: Use standard closed GOPs\n"
1613+ " - display: Base GOP length on display order\n"
1614+ " - coded: Base GOP length on coded order\n"
1615+ " (Required for Blu-Ray)\n"
1616+ " Only available with b-frames\n" );
1617 H1( " --no-cabac Disable CABAC\n" );
1618 H1( " -r, --ref <integer> Number of reference frames [%d]\n", defaults->i_frame_reference );
1619 H1( " --no-deblock Disable loop filter\n" );
1620@@ -441,7 +447,8 @@ static void Help( x264_param_t *defaults, int longhelp )
1621 " or b=<float> (bitrate multiplier)\n" );
1622 H2( " --qpfile <string> Force frametypes and QPs for some or all frames\n"
1623 " Format of each line: framenumber frametype QP\n"
1624- " QP of -1 lets x264 choose. Frametypes: I,i,P,B,b.\n"
1625+ " QP of -1 lets x264 choose. Frametypes: I,i,K,P,B,b.\n"
1626+ " K=<I or i> depending on open-gop setting\n"
1627 " QPs are restricted by qpmin/qpmax.\n" );
1628 H1( "\n" );
1629 H1( "Analysis:\n" );
1630@@ -627,6 +634,7 @@ static struct option long_options[] =
1631 { "no-b-adapt", no_argument, NULL, 0 },
1632 { "b-bias", required_argument, NULL, 0 },
1633 { "b-pyramid", required_argument, NULL, 0 },
1634+ { "open-gop", required_argument, NULL, 0 },
1635 { "min-keyint", required_argument, NULL, 'i' },
1636 { "keyint", required_argument, NULL, 'I' },
1637 { "intra-refresh", no_argument, NULL, 0 },
1638@@ -1305,6 +1313,7 @@ static void parse_qpfile( cli_opt_t *opt, x264_picture_t *pic, int i_frame )
1639 pic->i_qpplus1 = qp+1;
1640 if ( type == 'I' ) pic->i_type = X264_TYPE_IDR;
1641 else if( type == 'i' ) pic->i_type = X264_TYPE_I;
1642+ else if( type == 'K' ) pic->i_type = X264_TYPE_KEYFRAME;
1643 else if( type == 'P' ) pic->i_type = X264_TYPE_P;
1644 else if( type == 'B' ) pic->i_type = X264_TYPE_BREF;
1645 else if( type == 'b' ) pic->i_type = X264_TYPE_B;
1646diff --git a/x264.h b/x264.h
1647index 9cd4600..09183fd 100644
1648--- a/x264.h
1649+++ b/x264.h
1650@@ -35,7 +35,7 @@
1651
1652 #include <stdarg.h>
1653
1654-#define X264_BUILD 98
1655+#define X264_BUILD 99
1656
1657 /* x264_t:
1658 * opaque handler for encoder */
1659@@ -104,6 +104,9 @@ typedef struct x264_t x264_t;
1660 #define X264_B_PYRAMID_STRICT 1
1661 #define X264_B_PYRAMID_NORMAL 2
1662 #define X264_KEYINT_MIN_AUTO 0
1663+#define X264_OPEN_GOP_NONE 0
1664+#define X264_OPEN_GOP_DISPLAY_ORDER 1
1665+#define X264_OPEN_GOP_CODED_ORDER 2
1666
1667 static const char * const x264_direct_pred_names[] = { "none", "spatial", "temporal", "auto", 0 };
1668 static const char * const x264_motion_est_names[] = { "dia", "hex", "umh", "esa", "tesa", 0 };
1669@@ -115,6 +118,7 @@ static const char * const x264_colorprim_names[] = { "", "bt709", "undef", "", "
1670 static const char * const x264_transfer_names[] = { "", "bt709", "undef", "", "bt470m", "bt470bg", "smpte170m", "smpte240m", "linear", "log100", "log316", 0 };
1671 static const char * const x264_colmatrix_names[] = { "GBR", "bt709", "undef", "", "fcc", "bt470bg", "smpte170m", "smpte240m", "YCgCo", 0 };
1672 static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 };
1673+static const char * const x264_open_gop_names[] = { "none", "display", "coded", 0 };
1674
1675 /* Colorspace type
1676 * legacy only; nothing other than I420 is really supported. */
1677@@ -138,6 +142,7 @@ static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 };
1678 #define X264_TYPE_P 0x0003
1679 #define X264_TYPE_BREF 0x0004 /* Non-disposable B-frame */
1680 #define X264_TYPE_B 0x0005
1681+#define X264_TYPE_KEYFRAME 0x0006 /* IDR or I depending on b_open_gop option */
1682 #define IS_X264_TYPE_I(x) ((x)==X264_TYPE_I || (x)==X264_TYPE_IDR)
1683 #define IS_X264_TYPE_B(x) ((x)==X264_TYPE_B || (x)==X264_TYPE_BREF)
1684
1685@@ -221,6 +226,7 @@ typedef struct x264_param_t
1686 int i_bframe_adaptive;
1687 int i_bframe_bias;
1688 int i_bframe_pyramid; /* Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal */
1689+ int i_open_gop; /* Open gop: 1=display order, 2=coded order to determine gop size */
1690
1691 int b_deblocking_filter;
1692 int i_deblocking_filter_alphac0; /* [-6, 6] -6 light filter, 6 strong */
1693--
16941.7.0.4
1695
1696
1697From ebafd665ef992e06623a79889ddccc29693beb7f Mon Sep 17 00:00:00 2001
1698From: Jason Garrett-Glaser <darkshikari@gmail.com>
1699Date: Thu, 17 Jun 2010 14:50:07 -0700
1700Subject: [PATCH 09/10] Lookaheadless MB-tree support
1701 Uses past motion information instead of future data from the lookahead.
1702 Not as accurate, but better than nothing in zero-latency compression when a lookahead isn't available.
1703 Currently resets on keyframes, so only available if intra-refresh is set, to avoid pops on non-scenecut keyframes.
1704
1705Also slightly modify encoding presets: disable rc-lookahead in the fastest presets.
1706Enable MB-tree in "veryfast", albeit with a very short lookahead.
1707---
1708 common/common.c | 5 ++++-
1709 encoder/encoder.c | 7 ++++++-
1710 encoder/slicetype.c | 48 ++++++++++++++++++++++++++++++++++--------------
1711 x264.c | 14 +++++++-------
1712 4 files changed, 51 insertions(+), 23 deletions(-)
1713
1714diff --git a/common/common.c b/common/common.c
1715index d61d82a..5095ce8 100644
1716--- a/common/common.c
1717+++ b/common/common.c
1718@@ -184,6 +184,7 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
1719 param->rc.b_mb_tree = 0;
1720 param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
1721 param->analyse.b_weighted_bipred = 0;
1722+ param->rc.i_lookahead = 0;
1723 }
1724 else if( !strcasecmp( preset, "superfast" ) )
1725 {
1726@@ -195,6 +196,7 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
1727 param->analyse.i_trellis = 0;
1728 param->rc.b_mb_tree = 0;
1729 param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
1730+ param->rc.i_lookahead = 0;
1731 }
1732 else if( !strcasecmp( preset, "veryfast" ) )
1733 {
1734@@ -203,8 +205,8 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
1735 param->i_frame_reference = 1;
1736 param->analyse.b_mixed_references = 0;
1737 param->analyse.i_trellis = 0;
1738- param->rc.b_mb_tree = 0;
1739 param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
1740+ param->rc.i_lookahead = 10;
1741 }
1742 else if( !strcasecmp( preset, "faster" ) )
1743 {
1744@@ -355,6 +357,7 @@ static int x264_param_apply_tune( x264_param_t *param, const char *tune )
1745 param->i_bframe = 0;
1746 param->b_sliced_threads = 1;
1747 param->b_vfr_input = 0;
1748+ param->rc.b_mb_tree = 0;
1749 }
1750 else if( !strncasecmp( s, "touhou", 6 ) )
1751 {
1752diff --git a/encoder/encoder.c b/encoder/encoder.c
1753index aa84192..b4c63c9 100644
1754--- a/encoder/encoder.c
1755+++ b/encoder/encoder.c
1756@@ -621,8 +621,13 @@ static int x264_validate_parameters( x264_t *h )
1757 }
1758
1759 h->param.rc.f_qcompress = x264_clip3f( h->param.rc.f_qcompress, 0.0, 1.0 );
1760- if( !h->param.rc.i_lookahead || h->param.i_keyint_max == 1 || h->param.rc.f_qcompress == 1 )
1761+ if( h->param.i_keyint_max == 1 || h->param.rc.f_qcompress == 1 )
1762 h->param.rc.b_mb_tree = 0;
1763+ if( !h->param.rc.i_lookahead && !h->param.b_intra_refresh && h->param.rc.b_mb_tree )
1764+ {
1765+ x264_log( h, X264_LOG_WARNING, "lookaheadless mb-tree requires intra refresh\n" );
1766+ h->param.rc.b_mb_tree = 0;
1767+ }
1768 if( h->param.rc.b_stat_read )
1769 h->param.rc.i_lookahead = 0;
1770 #if HAVE_PTHREAD
1771diff --git a/encoder/slicetype.c b/encoder/slicetype.c
1772index 0a2514d..674b298 100644
1773--- a/encoder/slicetype.c
1774+++ b/encoder/slicetype.c
1775@@ -734,7 +734,7 @@ static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, in
1776 }
1777 }
1778
1779- if( h->param.rc.i_vbv_buffer_size && referenced )
1780+ if( h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead && referenced )
1781 x264_macroblock_tree_finish( h, frames[b], b == p1 ? b - p0 : 0 );
1782 }
1783
1784@@ -743,7 +743,8 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
1785 int idx = !b_intra;
1786 int last_nonb, cur_nonb = 1;
1787 int bframes = 0;
1788- int i = num_frames - 1;
1789+ int i = num_frames;
1790+
1791 if( b_intra )
1792 x264_slicetype_frame_cost( h, a, frames, 0, 0, 0, 0 );
1793
1794@@ -751,10 +752,25 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
1795 i--;
1796 last_nonb = i;
1797
1798- if( last_nonb < idx )
1799- return;
1800
1801- memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
1802+ if( !h->param.rc.i_lookahead )
1803+ {
1804+ if( b_intra )
1805+ {
1806+ memset( frames[0]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
1807+ memcpy( frames[0]->f_qp_offset, frames[0]->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
1808+ return;
1809+ }
1810+ XCHG( uint16_t*, frames[last_nonb]->i_propagate_cost, frames[0]->i_propagate_cost );
1811+ memset( frames[0]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
1812+ }
1813+ else
1814+ {
1815+ if( last_nonb < idx )
1816+ return;
1817+ memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
1818+ }
1819+
1820 while( i-- > idx )
1821 {
1822 cur_nonb = i;
1823@@ -796,6 +812,12 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
1824 last_nonb = cur_nonb;
1825 }
1826
1827+ if( !h->param.rc.i_lookahead )
1828+ {
1829+ x264_macroblock_tree_propagate( h, frames, 0, last_nonb, last_nonb, 1 );
1830+ XCHG( uint16_t*, frames[last_nonb]->i_propagate_cost, frames[0]->i_propagate_cost );
1831+ }
1832+
1833 x264_macroblock_tree_finish( h, frames[last_nonb], last_nonb );
1834 if( h->param.i_bframe_pyramid && bframes > 1 && !h->param.rc.i_vbv_buffer_size )
1835 x264_macroblock_tree_finish( h, frames[last_nonb+(bframes+1)/2], 0 );
1836@@ -1062,6 +1084,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
1837 int i_mb_count = NUM_MBS;
1838 int cost1p0, cost2p0, cost1b1, cost2p1;
1839 int i_max_search = X264_MIN( h->lookahead->next.i_size, X264_LOOKAHEAD_MAX );
1840+ int vbv_lookahead = h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead;
1841 if( h->param.b_deterministic )
1842 i_max_search = X264_MIN( i_max_search, h->lookahead->i_slicetype_length + !keyframe );
1843
1844@@ -1074,7 +1097,11 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
1845 frames[framecnt+1] = h->lookahead->next.list[framecnt];
1846
1847 if( !framecnt )
1848+ {
1849+ if( h->param.rc.b_mb_tree )
1850+ x264_macroblock_tree( h, &a, frames, 0, keyframe );
1851 return;
1852+ }
1853
1854 keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->lookahead->i_last_keyframe - 1;
1855 orig_num_frames = num_frames = h->param.b_intra_refresh ? framecnt : X264_MIN( framecnt, keyint_limit );
1856@@ -1085,15 +1112,8 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
1857 * there will be significant visual artifacts if the frames just before
1858 * go down in quality due to being referenced less, despite it being
1859 * more RD-optimal. */
1860- if( (h->param.analyse.b_psy && h->param.rc.b_mb_tree) || h->param.rc.i_vbv_buffer_size )
1861+ if( (h->param.analyse.b_psy && h->param.rc.b_mb_tree) || vbv_lookahead )
1862 num_frames = framecnt;
1863- else if( num_frames == 1 )
1864- {
1865- frames[1]->i_type = X264_TYPE_P;
1866- if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1, 1, orig_num_frames ) )
1867- frames[1]->i_type = X264_TYPE_I;
1868- return;
1869- }
1870 else if( num_frames == 0 )
1871 {
1872 frames[1]->i_type = X264_TYPE_I;
1873@@ -1224,7 +1244,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
1874 i = j;
1875 }
1876
1877- if( h->param.rc.i_vbv_buffer_size )
1878+ if( vbv_lookahead )
1879 x264_vbv_lookahead( h, &a, frames, num_frames, keyframe );
1880
1881 /* Restore frametypes for all frames that haven't actually been decided yet. */
1882diff --git a/x264.c b/x264.c
1883index fa73b5c..8d4966f 100644
1884--- a/x264.c
1885+++ b/x264.c
1886@@ -285,16 +285,16 @@ static void Help( x264_param_t *defaults, int longhelp )
1887 " --no-8x8dct --aq-mode 0 --b-adapt 0\n"
1888 " --bframes 0 --no-cabac --no-deblock\n"
1889 " --no-mbtree --me dia --no-mixed-refs\n"
1890- " --partitions none --ref 1 --scenecut 0\n"
1891- " --subme 0 --trellis 0 --no-weightb\n"
1892- " --weightp 0\n"
1893+ " --partitions none --rc-lookahead 0 --ref 1\n"
1894+ " --scenecut 0 --subme 0 --trellis 0\n"
1895+ " --no-weightb --weightp 0\n"
1896 " - superfast:\n"
1897 " --no-mbtree --me dia --no-mixed-refs\n"
1898- " --partitions i8x8,i4x4 --ref 1\n"
1899- " --subme 1 --trellis 0 --weightp 0\n"
1900+ " --partitions i8x8,i4x4 --rc-lookahead 0\n"
1901+ " --ref 1 --subme 1 --trellis 0 --weightp 0\n"
1902 " - veryfast:\n"
1903- " --no-mbtree --no-mixed-refs --ref 1\n"
1904- " --subme 2 --trellis 0 --weightp 0\n"
1905+ " --no-mixed-refs --rc-lookahead 10\n"
1906+ " --ref 1 --subme 2 --trellis 0 --weightp 0\n"
1907 " - faster:\n"
1908 " --no-mixed-refs --rc-lookahead 20\n"
1909 " --ref 2 --subme 4 --weightp 1\n"
1910--
19111.7.0.4
1912
1913
1914From 3a9a369e8ff5c767a6c084dd96c2b1aa9d1205f2 Mon Sep 17 00:00:00 2001
1915From: Jason Garrett-Glaser <darkshikari@gmail.com>
1916Date: Wed, 23 Jun 2010 17:29:34 -0700
1917Subject: [PATCH 10/10] Interactive encoder control: error resilience
1918 In low-latency streaming with few clients, it is often feasible to modify encoder behavior in some fashion based on feedback from clients.
1919 One possible application of this is error resilience: if a packet is lost, mark the associated frame (and any referenced from it) as lost.
1920 This allows quick recovery from errors with minimal expense bit-wise.
1921
1922The new i_dpb_size parameter allows a calling application to tell x264 to use a larger DPB size than required by the number of reference frames.
1923This lets x264 and the client keep a large buffer of old references to fall back to in case of lost frames.
1924If no recovery is possible even with the available buffer, x264 will force a keyframe.
1925
1926This initial version does not support B-frames or intra refresh.
1927Recommended usage is to set keyint to a very large value, so that keyframes do not occur except as necessary for extreme error recovery.
1928
1929Full documentation is in x264.h.
1930
1931Move DTS/PTS calculation to before encoding each frame instead of after.
1932Improve documentation of x264_encoder_intra_refresh.
1933---
1934 common/common.c | 2 +
1935 common/common.h | 2 +
1936 common/frame.c | 1 +
1937 common/frame.h | 4 ++
1938 common/mvpred.c | 14 +++++---
1939 encoder/encoder.c | 96 +++++++++++++++++++++++++++++++++++++++-------------
1940 encoder/set.c | 4 +-
1941 x264.h | 33 +++++++++++++++++-
1942 8 files changed, 123 insertions(+), 33 deletions(-)
1943
1944diff --git a/common/common.c b/common/common.c
1945index 5095ce8..1799795 100644
1946--- a/common/common.c
1947+++ b/common/common.c
1948@@ -634,6 +634,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
1949 }
1950 OPT2("ref", "frameref")
1951 p->i_frame_reference = atoi(value);
1952+ OPT("dpb-size")
1953+ p->i_dpb_size = atoi(value);
1954 OPT("keyint")
1955 {
1956 p->i_keyint_max = atoi(value);
1957diff --git a/common/common.h b/common/common.h
1958index dfa1121..7b60811 100644
1959--- a/common/common.h
1960+++ b/common/common.h
1961@@ -421,6 +421,8 @@ struct x264_t
1962 int i_cpb_delay_lookahead;
1963
1964 int b_queued_intra_refresh;
1965+ int64_t i_reference_invalidate_pts;
1966+ int64_t i_last_idr_pts;
1967
1968 /* We use only one SPS and one PPS */
1969 x264_sps_t sps_array[1];
1970diff --git a/common/frame.c b/common/frame.c
1971index 7c2fce0..81eac6f 100644
1972--- a/common/frame.c
1973+++ b/common/frame.c
1974@@ -443,6 +443,7 @@ x264_frame_t *x264_frame_pop_unused( x264_t *h, int b_fdec )
1975 frame->b_intra_calculated = 0;
1976 frame->b_scenecut = 1;
1977 frame->b_keyframe = 0;
1978+ frame->b_valid = 1;
1979
1980 memset( frame->weight, 0, sizeof(frame->weight) );
1981 memset( frame->f_weighted_cost_delta, 0, sizeof(frame->f_weighted_cost_delta) );
1982diff --git a/common/frame.h b/common/frame.h
1983index 26529ce..cb50a0b 100644
1984--- a/common/frame.h
1985+++ b/common/frame.h
1986@@ -35,6 +35,7 @@ typedef struct x264_frame
1987 int i_type;
1988 int i_qpplus1;
1989 int64_t i_pts;
1990+ int64_t i_dts;
1991 int64_t i_reordered_pts;
1992 int i_duration; /* in SPS time_scale units (i.e 2 * timebase units) used for vfr */
1993 int i_cpb_duration;
1994@@ -143,6 +144,9 @@ typedef struct x264_frame
1995 int i_pir_start_col;
1996 int i_pir_end_col;
1997 int i_frames_since_pir;
1998+
1999+ /* interactive encoder control */
2000+ int b_valid;
2001 } x264_frame_t;
2002
2003 /* synchronized frame list */
2004diff --git a/common/mvpred.c b/common/mvpred.c
2005index 10a18b3..03dfe9f 100644
2006--- a/common/mvpred.c
2007+++ b/common/mvpred.c
2008@@ -409,12 +409,16 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[
2009
2010 if( i_ref == 0 && h->frames.b_have_lowres )
2011 {
2012- int16_t (*lowres_mv)[2] = i_list ? h->fenc->lowres_mvs[1][h->fref1[0]->i_frame-h->fenc->i_frame-1]
2013- : h->fenc->lowres_mvs[0][h->fenc->i_frame-h->fref0[0]->i_frame-1];
2014- if( lowres_mv[0][0] != 0x7fff )
2015+ int idx = i_list ? h->fref1[0]->i_frame-h->fenc->i_frame-1
2016+ : h->fenc->i_frame-h->fref0[0]->i_frame-1;
2017+ if( idx <= h->param.i_bframe )
2018 {
2019- M32( mvc[i] ) = (M32( lowres_mv[h->mb.i_mb_xy] )*2)&0xfffeffff;
2020- i++;
2021+ int16_t (*lowres_mv)[2] = h->fenc->lowres_mvs[i_list][idx];
2022+ if( lowres_mv[0][0] != 0x7fff )
2023+ {
2024+ M32( mvc[i] ) = (M32( lowres_mv[h->mb.i_mb_xy] )*2)&0xfffeffff;
2025+ i++;
2026+ }
2027 }
2028 }
2029
2030diff --git a/encoder/encoder.c b/encoder/encoder.c
2031index b4c63c9..acf0e10 100644
2032--- a/encoder/encoder.c
2033+++ b/encoder/encoder.c
2034@@ -564,6 +564,7 @@ static int x264_validate_parameters( x264_t *h )
2035 }
2036
2037 h->param.i_frame_reference = x264_clip3( h->param.i_frame_reference, 1, 16 );
2038+ h->param.i_dpb_size = x264_clip3( h->param.i_dpb_size, 1, 16 );
2039 if( h->param.i_keyint_max <= 0 )
2040 h->param.i_keyint_max = 1;
2041 if( h->param.i_scenecut_threshold < 0 )
2042@@ -593,10 +594,11 @@ static int x264_validate_parameters( x264_t *h )
2043 x264_log( h, X264_LOG_WARNING, "b-pyramid normal + intra-refresh is not supported\n" );
2044 h->param.i_bframe_pyramid = X264_B_PYRAMID_STRICT;
2045 }
2046- if( h->param.b_intra_refresh && h->param.i_frame_reference > 1 )
2047+ if( h->param.b_intra_refresh && (h->param.i_frame_reference > 1 || h->param.i_dpb_size > 1) )
2048 {
2049 x264_log( h, X264_LOG_WARNING, "ref > 1 + intra-refresh is not supported\n" );
2050 h->param.i_frame_reference = 1;
2051+ h->param.i_dpb_size = 1;
2052 }
2053 if( h->param.b_intra_refresh && h->param.i_open_gop )
2054 {
2055@@ -1481,6 +1483,8 @@ static inline void x264_reference_build_list( x264_t *h, int i_poc )
2056
2057 for( int i = 0; h->frames.reference[i]; i++ )
2058 {
2059+ if( !h->frames.reference[i]->b_valid )
2060+ continue;
2061 if( h->frames.reference[i]->i_poc < i_poc )
2062 h->fref0[h->i_ref0++] = h->frames.reference[i];
2063 else if( h->frames.reference[i]->i_poc > i_poc )
2064@@ -2185,6 +2189,23 @@ void x264_encoder_intra_refresh( x264_t *h )
2065 h->b_queued_intra_refresh = 1;
2066 }
2067
2068+int x264_encoder_invalidate_reference( x264_t *h, int64_t pts )
2069+{
2070+ if( h->param.i_bframe )
2071+ {
2072+ x264_log( h, X264_LOG_ERROR, "x264_encoder_invalidate_reference is not supported with B-frames enabled\n" );
2073+ return -1;
2074+ }
2075+ if( h->param.b_intra_refresh )
2076+ {
2077+ x264_log( h, X264_LOG_ERROR, "x264_encoder_invalidate_reference is not supported with intra refresh enabled\n" );
2078+ return -1;
2079+ }
2080+ h = h->thread[h->i_thread_phase];
2081+ h->i_reference_invalidate_pts = pts;
2082+ return 0;
2083+}
2084+
2085 /****************************************************************************
2086 * x264_encoder_encode:
2087 * XXX: i_poc : is the poc of the current given picture
2088@@ -2330,6 +2351,29 @@ int x264_encoder_encode( x264_t *h,
2089 h->fenc->param->param_free( h->fenc->param );
2090 }
2091
2092+ if( h->i_reference_invalidate_pts )
2093+ {
2094+ if( h->i_reference_invalidate_pts >= h->i_last_idr_pts )
2095+ for( int i = 0; h->frames.reference[i]; i++ )
2096+ if( h->i_reference_invalidate_pts <= h->frames.reference[i]->i_pts )
2097+ h->frames.reference[i]->b_valid = 0;
2098+ h->i_reference_invalidate_pts = 0;
2099+ }
2100+
2101+ if( !IS_X264_TYPE_I( h->fenc->i_type ) )
2102+ {
2103+ int valid_refs_left = 0;
2104+ for( int i = 0; h->frames.reference[i]; i++ )
2105+ if( h->frames.reference[i]->b_valid )
2106+ valid_refs_left++;
2107+ /* No valid reference frames left: force an IDR. */
2108+ if( !valid_refs_left )
2109+ {
2110+ h->fenc->b_keyframe = 1;
2111+ h->fenc->i_type = X264_TYPE_IDR;
2112+ }
2113+ }
2114+
2115 if( h->fenc->b_keyframe )
2116 {
2117 h->frames.i_last_keyframe = h->fenc->i_frame;
2118@@ -2393,7 +2437,30 @@ int x264_encoder_encode( x264_t *h,
2119 h->fenc->b_kept_as_ref =
2120 h->fdec->b_kept_as_ref = i_nal_ref_idc != NAL_PRIORITY_DISPOSABLE && h->param.i_keyint_max > 1;
2121
2122-
2123+ h->fdec->i_pts = h->fenc->i_pts *= h->i_dts_compress_multiplier;
2124+ if( h->frames.i_bframe_delay )
2125+ {
2126+ int64_t *prev_reordered_pts = thread_current->frames.i_prev_reordered_pts;
2127+ if( h->i_frame <= h->frames.i_bframe_delay )
2128+ {
2129+ if( h->i_dts_compress_multiplier == 1 )
2130+ h->fdec->i_dts = h->fenc->i_reordered_pts - h->frames.i_bframe_delay_time;
2131+ else
2132+ {
2133+ /* DTS compression */
2134+ if( h->i_frame == 1 )
2135+ thread_current->frames.i_init_delta = h->fenc->i_reordered_pts * h->i_dts_compress_multiplier;
2136+ h->fdec->i_dts = h->i_frame * thread_current->frames.i_init_delta / h->i_dts_compress_multiplier;
2137+ }
2138+ }
2139+ else
2140+ h->fdec->i_dts = prev_reordered_pts[ (h->i_frame - h->frames.i_bframe_delay) % h->frames.i_bframe_delay ];
2141+ prev_reordered_pts[ h->i_frame % h->frames.i_bframe_delay ] = h->fenc->i_reordered_pts * h->i_dts_compress_multiplier;
2142+ }
2143+ else
2144+ h->fdec->i_dts = h->fenc->i_reordered_pts;
2145+ if( h->fenc->i_type == X264_TYPE_IDR )
2146+ h->i_last_idr_pts = h->fdec->i_pts;
2147
2148 /* ------------------- Init ----------------------------- */
2149 /* build ref list 0/1 */
2150@@ -2616,28 +2683,9 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
2151
2152 pic_out->b_keyframe = h->fenc->b_keyframe;
2153
2154- pic_out->i_pts = h->fenc->i_pts *= h->i_dts_compress_multiplier;
2155- if( h->frames.i_bframe_delay )
2156- {
2157- int64_t *prev_reordered_pts = thread_current->frames.i_prev_reordered_pts;
2158- if( h->i_frame <= h->frames.i_bframe_delay )
2159- {
2160- if( h->i_dts_compress_multiplier == 1 )
2161- pic_out->i_dts = h->fenc->i_reordered_pts - h->frames.i_bframe_delay_time;
2162- else
2163- {
2164- /* DTS compression */
2165- if( h->i_frame == 1 )
2166- thread_current->frames.i_init_delta = h->fenc->i_reordered_pts * h->i_dts_compress_multiplier;
2167- pic_out->i_dts = h->i_frame * thread_current->frames.i_init_delta / h->i_dts_compress_multiplier;
2168- }
2169- }
2170- else
2171- pic_out->i_dts = prev_reordered_pts[ (h->i_frame - h->frames.i_bframe_delay) % h->frames.i_bframe_delay ];
2172- prev_reordered_pts[ h->i_frame % h->frames.i_bframe_delay ] = h->fenc->i_reordered_pts * h->i_dts_compress_multiplier;
2173- }
2174- else
2175- pic_out->i_dts = h->fenc->i_reordered_pts;
2176+ pic_out->i_pts = h->fdec->i_pts;
2177+ pic_out->i_dts = h->fdec->i_dts;
2178+
2179 if( pic_out->i_pts < pic_out->i_dts )
2180 x264_log( h, X264_LOG_WARNING, "invalid DTS: PTS is less than DTS\n" );
2181
2182diff --git a/encoder/set.c b/encoder/set.c
2183index 86b4a30..0ec3816 100644
2184--- a/encoder/set.c
2185+++ b/encoder/set.c
2186@@ -223,8 +223,8 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
2187 /* extra slot with pyramid so that we don't have to override the
2188 * order of forgetting old pictures */
2189 sps->vui.i_max_dec_frame_buffering =
2190- sps->i_num_ref_frames = X264_MIN(16, X264_MAX3(param->i_frame_reference, 1 + sps->vui.i_num_reorder_frames,
2191- param->i_bframe_pyramid ? 4 : 1 ));
2192+ sps->i_num_ref_frames = X264_MAX(param->i_dpb_size, X264_MIN(16, X264_MAX3(param->i_frame_reference,
2193+ 1 + sps->vui.i_num_reorder_frames, param->i_bframe_pyramid ? 4 : 1 )));
2194 sps->i_num_ref_frames -= param->i_bframe_pyramid == X264_B_PYRAMID_STRICT;
2195
2196 sps->vui.b_bitstream_restriction = 1;
2197diff --git a/x264.h b/x264.h
2198index 09183fd..21c8ca9 100644
2199--- a/x264.h
2200+++ b/x264.h
2201@@ -35,7 +35,7 @@
2202
2203 #include <stdarg.h>
2204
2205-#define X264_BUILD 99
2206+#define X264_BUILD 100
2207
2208 /* x264_t:
2209 * opaque handler for encoder */
2210@@ -217,6 +217,8 @@ typedef struct x264_param_t
2211
2212 /* Bitstream parameters */
2213 int i_frame_reference; /* Maximum number of reference frames */
2214+ int i_dpb_size; /* Force a DPB size larger than that implied by B-frames and reference frames.
2215+ * Useful in combination with interactive error resilience. */
2216 int i_keyint_max; /* Force an IDR keyframe at this interval */
2217 int i_keyint_min; /* Scenecuts closer together than this are coded as I, not IDR. */
2218 int i_scenecut_threshold; /* how aggressively to insert extra I frames */
2219@@ -682,9 +684,36 @@ int x264_encoder_delayed_frames( x264_t * );
2220 * If an intra refresh is not in progress, begin one with the next P-frame.
2221 * If an intra refresh is in progress, begin one as soon as the current one finishes.
2222 * Requires that b_intra_refresh be set.
2223+ *
2224 * Useful for interactive streaming where the client can tell the server that packet loss has
2225 * occurred. In this case, keyint can be set to an extremely high value so that intra refreshes
2226- * only occur when calling x264_encoder_intra_refresh. */
2227+ * only occur when calling x264_encoder_intra_refresh.
2228+ *
2229+ * In multi-pass encoding, if x264_encoder_intra_refresh is called differently in each pass,
2230+ * behavior is undefined.
2231+ *
2232+ * Should be called before the next x264_encoder_encode (i.e. not during an x264_encoder_encode). */
2233 void x264_encoder_intra_refresh( x264_t * );
2234+/* x264_encoder_invalidate_reference:
2235+ * An interactive error resilience tool, designed for use in a low-latency one-encoder-few-clients
2236+ * system. When the client has packet loss or otherwise incorrectly decodes a frame, the encoder
2237+ * can be told with this command to "forget" the frame and all frames that depend on it, referencing
2238+ * only frames that occurred before the loss. This will force a keyframe if no frames are left to
2239+ * reference after the aforementioned "forgetting".
2240+ *
2241+ * It is strongly recommended to use an i_dpb_size larger than i_frame_reference in this case, which
2242+ * allows the encoder to keep around extra, older frames to fall back on in case more recent frames
2243+ * are all invalidated. It is also recommended to set a very large keyframe interval, so that
2244+ * keyframes are not used except as necessary for error recovery.
2245+ *
2246+ * x264_encoder_invalidate_reference is not compatible with the use of B-frames or intra refresh.
2247+ *
2248+ * In multi-pass encoding, if x264_encoder_invalidate_reference is called differently in each pass,
2249+ * behavior is undefined.
2250+ *
2251+ * Should be called before the next x264_encoder_encode (i.e. not during an x264_encoder_encode).
2252+ *
2253+ * Returns 0 on success, negative on failure. */
2254+int x264_encoder_invalidate_reference( x264_t *, int64_t pts );
2255
2256 #endif
2257--
22581.7.0.4