· 8 years ago · Jun 01, 2017, 08:28 AM
1From 4fa7d539c1a09a0779194c7971495e966ff7de1e Mon Sep 17 00:00:00 2001
2From: Jason Garrett-Glaser <darkshikari@gmail.com>
3Date: Tue, 15 Jun 2010 05:15:42 -0700
4Subject: [PATCH 1/9] Fix compilation on ARM w/ Apple ABI
5
6---
7 encoder/me.c | 2 +-
8 1 files changed, 1 insertions(+), 1 deletions(-)
9
10diff --git a/encoder/me.c b/encoder/me.c
11index 2914eb3..291104a 100644
12--- a/encoder/me.c
13+++ b/encoder/me.c
14@@ -245,7 +245,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
15 pmv = pack16to32_mask( bmx, bmy );
16 if( i_mvc > 0 )
17 {
18- ALIGNED_ARRAY_8( int16_t, mvc_fpel,[16][2] );
19+ ALIGNED_ARRAY_8( int16_t, mvc_fpel,[16],[2] );
20 x264_predictor_roundclip( mvc_fpel, mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
21 bcost <<= 4;
22 for( int i = 1; i <= i_mvc; i++ )
23--
241.7.0.4
25
26
27From 3f539defdc78eb77c90e0164e62a851a4bb42669 Mon Sep 17 00:00:00 2001
28From: Steven Walters <kemuri9@gmail.com>
29Date: Wed, 9 Jun 2010 18:14:52 -0400
30Subject: [PATCH 2/9] Use threadpools to avoid unnecessary thread creation
31 Tiny performance improvement with fast settings and lots of threads.
32 May help more on some OSs with slow thread creation, like OS X.
33 Unify inconsistent synchronized abbreviations to sync.
34
35---
36 Makefile | 3 +-
37 common/common.h | 10 ++-
38 common/frame.c | 19 +++++-
39 common/frame.h | 9 ++-
40 common/threadpool.c | 163 +++++++++++++++++++++++++++++++++++++++++++++++++++
41 common/threadpool.h | 39 ++++++++++++
42 encoder/encoder.c | 79 ++++++++++++-------------
43 encoder/lookahead.c | 22 ++++----
44 input/thread.c | 17 ++---
45 9 files changed, 288 insertions(+), 73 deletions(-)
46 create mode 100644 common/threadpool.c
47 create mode 100644 common/threadpool.h
48
49diff --git a/Makefile b/Makefile
50index 8074ce5..9837821 100644
51--- a/Makefile
52+++ b/Makefile
53@@ -22,13 +22,14 @@ SRCSO =
54
55 CONFIG := $(shell cat config.h)
56
57-# Optional muxer module sources
58+# Optional module sources
59 ifneq ($(findstring HAVE_AVS, $(CONFIG)),)
60 SRCCLI += input/avs.c
61 endif
62
63 ifneq ($(findstring HAVE_PTHREAD, $(CONFIG)),)
64 SRCCLI += input/thread.c
65+SRCS += common/threadpool.c
66 endif
67
68 ifneq ($(findstring HAVE_LAVF, $(CONFIG)),)
69diff --git a/common/common.h b/common/common.h
70index abb5db2..659c2a4 100644
71--- a/common/common.h
72+++ b/common/common.h
73@@ -160,6 +160,7 @@ static const int x264_scan8[16+2*4+3] =
74 #include "cabac.h"
75 #include "quant.h"
76 #include "cpu.h"
77+#include "threadpool.h"
78
79 /****************************************************************************
80 * General functions
81@@ -364,9 +365,10 @@ typedef struct x264_lookahead_t
82 int i_last_keyframe;
83 int i_slicetype_length;
84 x264_frame_t *last_nonb;
85- x264_synch_frame_list_t ifbuf;
86- x264_synch_frame_list_t next;
87- x264_synch_frame_list_t ofbuf;
88+ x264_pthread_t thread_handle;
89+ x264_sync_frame_list_t ifbuf;
90+ x264_sync_frame_list_t next;
91+ x264_sync_frame_list_t ofbuf;
92 } x264_lookahead_t;
93
94 typedef struct x264_ratecontrol_t x264_ratecontrol_t;
95@@ -377,11 +379,11 @@ struct x264_t
96 x264_param_t param;
97
98 x264_t *thread[X264_THREAD_MAX+1];
99- x264_pthread_t thread_handle;
100 int b_thread_active;
101 int i_thread_phase; /* which thread to use for the next frame */
102 int i_threadslice_start; /* first row in this thread slice */
103 int i_threadslice_end; /* row after the end of this thread slice */
104+ x264_threadpool_t *threadpool;
105
106 /* bitstream output */
107 struct
108diff --git a/common/frame.c b/common/frame.c
109index c5c573f..7c2fce0 100644
110--- a/common/frame.c
111+++ b/common/frame.c
112@@ -517,7 +517,7 @@ void x264_frame_delete_list( x264_frame_t **list )
113 x264_free( list );
114 }
115
116-int x264_synch_frame_list_init( x264_synch_frame_list_t *slist, int max_size )
117+int x264_sync_frame_list_init( x264_sync_frame_list_t *slist, int max_size )
118 {
119 if( max_size < 0 )
120 return -1;
121@@ -533,7 +533,7 @@ fail:
122 return -1;
123 }
124
125-void x264_synch_frame_list_delete( x264_synch_frame_list_t *slist )
126+void x264_sync_frame_list_delete( x264_sync_frame_list_t *slist )
127 {
128 x264_pthread_mutex_destroy( &slist->mutex );
129 x264_pthread_cond_destroy( &slist->cv_fill );
130@@ -541,7 +541,7 @@ void x264_synch_frame_list_delete( x264_synch_frame_list_t *slist )
131 x264_frame_delete_list( slist->list );
132 }
133
134-void x264_synch_frame_list_push( x264_synch_frame_list_t *slist, x264_frame_t *frame )
135+void x264_sync_frame_list_push( x264_sync_frame_list_t *slist, x264_frame_t *frame )
136 {
137 x264_pthread_mutex_lock( &slist->mutex );
138 while( slist->i_size == slist->i_max_size )
139@@ -550,3 +550,16 @@ void x264_synch_frame_list_push( x264_synch_frame_list_t *slist, x264_frame_t *f
140 x264_pthread_mutex_unlock( &slist->mutex );
141 x264_pthread_cond_broadcast( &slist->cv_fill );
142 }
143+
144+x264_frame_t *x264_sync_frame_list_pop( x264_sync_frame_list_t *slist )
145+{
146+ x264_frame_t *frame;
147+ x264_pthread_mutex_lock( &slist->mutex );
148+ while( !slist->i_size )
149+ x264_pthread_cond_wait( &slist->cv_fill, &slist->mutex );
150+ frame = slist->list[ --slist->i_size ];
151+ slist->list[ slist->i_size ] = NULL;
152+ x264_pthread_cond_broadcast( &slist->cv_empty );
153+ x264_pthread_mutex_unlock( &slist->mutex );
154+ return frame;
155+}
156diff --git a/common/frame.h b/common/frame.h
157index 7d252c3..26529ce 100644
158--- a/common/frame.h
159+++ b/common/frame.h
160@@ -154,7 +154,7 @@ typedef struct
161 x264_pthread_mutex_t mutex;
162 x264_pthread_cond_t cv_fill; /* event signaling that the list became fuller */
163 x264_pthread_cond_t cv_empty; /* event signaling that the list became emptier */
164-} x264_synch_frame_list_t;
165+} x264_sync_frame_list_t;
166
167 typedef void (*x264_deblock_inter_t)( pixel *pix, int stride, int alpha, int beta, int8_t *tc0 );
168 typedef void (*x264_deblock_intra_t)( pixel *pix, int stride, int alpha, int beta );
169@@ -202,9 +202,10 @@ x264_frame_t *x264_frame_pop_unused( x264_t *h, int b_fdec );
170 void x264_frame_sort( x264_frame_t **list, int b_dts );
171 void x264_frame_delete_list( x264_frame_t **list );
172
173-int x264_synch_frame_list_init( x264_synch_frame_list_t *slist, int nelem );
174-void x264_synch_frame_list_delete( x264_synch_frame_list_t *slist );
175-void x264_synch_frame_list_push( x264_synch_frame_list_t *slist, x264_frame_t *frame );
176+int x264_sync_frame_list_init( x264_sync_frame_list_t *slist, int nelem );
177+void x264_sync_frame_list_delete( x264_sync_frame_list_t *slist );
178+void x264_sync_frame_list_push( x264_sync_frame_list_t *slist, x264_frame_t *frame );
179+x264_frame_t *x264_sync_frame_list_pop( x264_sync_frame_list_t *slist );
180
181 #define x264_frame_sort_dts(list) x264_frame_sort(list, 1)
182 #define x264_frame_sort_pts(list) x264_frame_sort(list, 0)
183diff --git a/common/threadpool.c b/common/threadpool.c
184new file mode 100644
185index 0000000..4448ea2
186--- /dev/null
187+++ b/common/threadpool.c
188@@ -0,0 +1,163 @@
189+/*****************************************************************************
190+ * threadpool.c: x264 threadpool module
191+ *****************************************************************************
192+ * Copyright (C) 2010 Steven Walters <kemuri9@gmail.com>
193+ *
194+ * This program is free software; you can redistribute it and/or modify
195+ * it under the terms of the GNU General Public License as published by
196+ * the Free Software Foundation; either version 2 of the License, or
197+ * (at your option) any later version.
198+ *
199+ * This program is distributed in the hope that it will be useful,
200+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
201+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
202+ * GNU General Public License for more details.
203+ *
204+ * You should have received a copy of the GNU General Public License
205+ * along with this program; if not, write to the Free Software
206+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
207+ *****************************************************************************/
208+
209+#include "common.h"
210+
211+typedef struct
212+{
213+ void *(*func)(void *);
214+ void *arg;
215+ void *ret;
216+} x264_threadpool_job_t;
217+
218+struct x264_threadpool_t
219+{
220+ int exit;
221+ int threads;
222+ x264_pthread_t *thread_handle;
223+ void (*init_func)(void *);
224+ void *init_arg;
225+
226+ /* requires a synchronized list structure and associated methods,
227+ so use what is already implemented for frames */
228+ x264_sync_frame_list_t uninit; /* list of jobs that are awaiting use */
229+ x264_sync_frame_list_t run; /* list of jobs that are queued for processing by the pool */
230+ x264_sync_frame_list_t done; /* list of jobs that have finished processing */
231+};
232+
233+static void x264_threadpool_thread( x264_threadpool_t *pool )
234+{
235+ if( pool->init_func )
236+ pool->init_func( pool->init_arg );
237+
238+ while( !pool->exit )
239+ {
240+ x264_threadpool_job_t *job = NULL;
241+ x264_pthread_mutex_lock( &pool->run.mutex );
242+ while( !pool->exit && !pool->run.i_size )
243+ x264_pthread_cond_wait( &pool->run.cv_fill, &pool->run.mutex );
244+ if( pool->run.i_size )
245+ {
246+ job = (void*)x264_frame_shift( pool->run.list );
247+ pool->run.i_size--;
248+ }
249+ x264_pthread_mutex_unlock( &pool->run.mutex );
250+ if( !job )
251+ continue;
252+ job->ret = job->func( job->arg ); /* execute the function */
253+ x264_sync_frame_list_push( &pool->done, (void*)job );
254+ }
255+}
256+
257+int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
258+ void (*init_func)(void *), void *init_arg )
259+{
260+ if( threads <= 0 )
261+ return -1;
262+
263+ x264_threadpool_t *pool;
264+ CHECKED_MALLOCZERO( pool, sizeof(x264_threadpool_t) );
265+ *p_pool = pool;
266+
267+ pool->init_func = init_func;
268+ pool->init_arg = init_arg;
269+ pool->threads = X264_MIN( threads, X264_THREAD_MAX );
270+
271+ CHECKED_MALLOC( pool->thread_handle, pool->threads * sizeof(x264_pthread_t) );
272+
273+ if( x264_sync_frame_list_init( &pool->uninit, pool->threads ) ||
274+ x264_sync_frame_list_init( &pool->run, pool->threads ) ||
275+ x264_sync_frame_list_init( &pool->done, pool->threads ) )
276+ goto fail;
277+
278+ for( int i = 0; i < pool->threads; i++ )
279+ {
280+ x264_threadpool_job_t *job;
281+ CHECKED_MALLOC( job, sizeof(x264_threadpool_job_t) );
282+ x264_sync_frame_list_push( &pool->uninit, (void*)job );
283+ }
284+ for( int i = 0; i < pool->threads; i++ )
285+ if( x264_pthread_create( pool->thread_handle+i, NULL, (void*)x264_threadpool_thread, pool ) )
286+ goto fail;
287+
288+ return 0;
289+fail:
290+ return -1;
291+}
292+
293+void x264_threadpool_run( x264_threadpool_t *pool, void *(*func)(void *), void *arg )
294+{
295+ x264_threadpool_job_t *job = (void*)x264_sync_frame_list_pop( &pool->uninit );
296+ job->func = func;
297+ job->arg = arg;
298+ x264_sync_frame_list_push( &pool->run, (void*)job );
299+}
300+
301+void *x264_threadpool_wait( x264_threadpool_t *pool, void *arg )
302+{
303+ x264_threadpool_job_t *job = NULL;
304+
305+ x264_pthread_mutex_lock( &pool->done.mutex );
306+ while( !job )
307+ {
308+ for( int i = 0; i < pool->done.i_size; i++ )
309+ {
310+ x264_threadpool_job_t *t = (void*)pool->done.list[i];
311+ if( t->arg == arg )
312+ {
313+ job = (void*)x264_frame_shift( pool->done.list+i );
314+ pool->done.i_size--;
315+ }
316+ }
317+ if( !job )
318+ x264_pthread_cond_wait( &pool->done.cv_fill, &pool->done.mutex );
319+ }
320+ x264_pthread_mutex_unlock( &pool->done.mutex );
321+
322+ void *ret = job->ret;
323+ x264_sync_frame_list_push( &pool->uninit, (void*)job );
324+ return ret;
325+}
326+
327+static void x264_threadpool_list_delete( x264_sync_frame_list_t *slist )
328+{
329+ for( int i = 0; slist->list[i]; i++ )
330+ {
331+ x264_free( slist->list[i] );
332+ slist->list[i] = NULL;
333+ }
334+ x264_sync_frame_list_delete( slist );
335+}
336+
337+void x264_threadpool_delete( x264_threadpool_t *pool )
338+{
339+ x264_pthread_mutex_lock( &pool->run.mutex );
340+ pool->exit = 1;
341+ x264_pthread_cond_broadcast( &pool->run.cv_fill );
342+ x264_pthread_mutex_unlock( &pool->run.mutex );
343+ for( int i = 0; i < pool->threads; i++ )
344+ x264_pthread_join( pool->thread_handle[i], NULL );
345+
346+ x264_threadpool_list_delete( &pool->uninit );
347+ x264_threadpool_list_delete( &pool->run );
348+ x264_threadpool_list_delete( &pool->done );
349+ x264_free( pool->thread_handle );
350+ x264_free( pool );
351+}
352diff --git a/common/threadpool.h b/common/threadpool.h
353new file mode 100644
354index 0000000..519737c
355--- /dev/null
356+++ b/common/threadpool.h
357@@ -0,0 +1,39 @@
358+/*****************************************************************************
359+ * threadpool.h: x264 threadpool module
360+ *****************************************************************************
361+ * Copyright (C) 2010 Steven Walters <kemuri9@gmail.com>
362+ *
363+ * This program is free software; you can redistribute it and/or modify
364+ * it under the terms of the GNU General Public License as published by
365+ * the Free Software Foundation; either version 2 of the License, or
366+ * (at your option) any later version.
367+ *
368+ * This program is distributed in the hope that it will be useful,
369+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
370+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
371+ * GNU General Public License for more details.
372+ *
373+ * You should have received a copy of the GNU General Public License
374+ * along with this program; if not, write to the Free Software
375+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
376+ *****************************************************************************/
377+
378+#ifndef X264_THREADPOOL_H
379+#define X264_THREADPOOL_H
380+
381+typedef struct x264_threadpool_t x264_threadpool_t;
382+
383+#if HAVE_PTHREAD
384+int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
385+ void (*init_func)(void *), void *init_arg );
386+void x264_threadpool_run( x264_threadpool_t *pool, void *(*func)(void *), void *arg );
387+void *x264_threadpool_wait( x264_threadpool_t *pool, void *arg );
388+void x264_threadpool_delete( x264_threadpool_t *pool );
389+#else
390+#define x264_threadpool_init(p,t,f,a) -1
391+#define x264_threadpool_run(p,f,a)
392+#define x264_threadpool_wait(p,a) NULL
393+#define x264_threadpool_delete(p)
394+#endif
395+
396+#endif
397diff --git a/encoder/encoder.c b/encoder/encoder.c
398index 08a28bd..0d33915 100644
399--- a/encoder/encoder.c
400+++ b/encoder/encoder.c
401@@ -349,6 +349,20 @@ fail:
402 return -1;
403 }
404
405+#if HAVE_PTHREAD
406+static void x264_encoder_thread_init( x264_t *h )
407+{
408+ if( h->param.i_sync_lookahead )
409+ x264_lower_thread_priority( 10 );
410+
411+#if HAVE_MMX
412+ /* Misalign mask has to be set separately for each thread. */
413+ if( h->param.cpu&X264_CPU_SSE_MISALIGN )
414+ x264_cpu_mask_misalign_sse();
415+#endif
416+}
417+#endif
418+
419 /****************************************************************************
420 *
421 ****************************************************************************
422@@ -1052,6 +1066,10 @@ x264_t *x264_encoder_open( x264_param_t *param )
423 CHECKED_MALLOC( h->nal_buffer, h->out.i_bitstream * 3/2 + 4 );
424 h->nal_buffer_size = h->out.i_bitstream * 3/2 + 4;
425
426+ if( h->param.i_threads > 1 &&
427+ x264_threadpool_init( &h->threadpool, h->param.i_threads, (void*)x264_encoder_thread_init, h ) )
428+ goto fail;
429+
430 h->thread[0] = h;
431 for( int i = 1; i < h->param.i_threads + !!h->param.i_sync_lookahead; i++ )
432 CHECKED_MALLOC( h->thread[i], sizeof(x264_t) );
433@@ -2044,14 +2062,6 @@ static void *x264_slices_write( x264_t *h )
434 {
435 int i_slice_num = 0;
436 int last_thread_mb = h->sh.i_last_mb;
437- if( h->param.i_sync_lookahead )
438- x264_lower_thread_priority( 10 );
439-
440-#if HAVE_MMX
441- /* Misalign mask has to be set separately for each thread. */
442- if( h->param.cpu&X264_CPU_SSE_MISALIGN )
443- x264_cpu_mask_misalign_sse();
444-#endif
445
446 #if HAVE_VISUALIZE
447 if( h->param.b_visualize )
448@@ -2093,11 +2103,6 @@ static void *x264_slices_write( x264_t *h )
449
450 static int x264_threaded_slices_write( x264_t *h )
451 {
452- void *ret = NULL;
453-#if HAVE_MMX
454- if( h->param.cpu&X264_CPU_SSE_MISALIGN )
455- x264_cpu_mask_misalign_sse();
456-#endif
457 /* set first/last mb and sync contexts */
458 for( int i = 0; i < h->param.i_threads; i++ )
459 {
460@@ -2121,16 +2126,14 @@ static int x264_threaded_slices_write( x264_t *h )
461 /* dispatch */
462 for( int i = 0; i < h->param.i_threads; i++ )
463 {
464- if( x264_pthread_create( &h->thread[i]->thread_handle, NULL, (void*)x264_slices_write, (void*)h->thread[i] ) )
465- return -1;
466+ x264_threadpool_run( h->threadpool, (void*)x264_slices_write, h->thread[i] );
467 h->thread[i]->b_thread_active = 1;
468 }
469 for( int i = 0; i < h->param.i_threads; i++ )
470 {
471- x264_pthread_join( h->thread[i]->thread_handle, &ret );
472 h->thread[i]->b_thread_active = 0;
473- if( (intptr_t)ret )
474- return (intptr_t)ret;
475+ if( (intptr_t)x264_threadpool_wait( h->threadpool, h->thread[i] ) )
476+ return -1;
477 }
478
479 /* Go back and fix up the hpel on the borders between slices. */
480@@ -2206,6 +2209,10 @@ int x264_encoder_encode( x264_t *h,
481 thread_current =
482 thread_oldest = h;
483 }
484+#if HAVE_MMX
485+ if( h->i_thread_frames == 1 && h->param.cpu&X264_CPU_SSE_MISALIGN )
486+ x264_cpu_mask_misalign_sse();
487+#endif
488
489 // ok to call this before encoding any frames, since the initial values of fdec have b_kept_as_ref=0
490 if( x264_reference_update( h ) )
491@@ -2529,8 +2536,7 @@ int x264_encoder_encode( x264_t *h,
492 h->i_threadslice_end = h->mb.i_mb_height;
493 if( h->i_thread_frames > 1 )
494 {
495- if( x264_pthread_create( &h->thread_handle, NULL, (void*)x264_slices_write, h ) )
496- return -1;
497+ x264_threadpool_run( h->threadpool, (void*)x264_slices_write, h );
498 h->b_thread_active = 1;
499 }
500 else if( h->param.b_sliced_threads )
501@@ -2553,11 +2559,9 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
502
503 if( h->b_thread_active )
504 {
505- void *ret = NULL;
506- x264_pthread_join( h->thread_handle, &ret );
507 h->b_thread_active = 0;
508- if( (intptr_t)ret )
509- return (intptr_t)ret;
510+ if( (intptr_t)x264_threadpool_wait( h->threadpool, h ) )
511+ return -1;
512 }
513 if( !h->out.i_nal )
514 {
515@@ -2822,25 +2826,20 @@ void x264_encoder_close ( x264_t *h )
516 x264_lookahead_delete( h );
517
518 if( h->param.i_threads > 1 )
519+ x264_threadpool_delete( h->threadpool );
520+ if( h->i_thread_frames > 1 )
521 {
522- // don't strictly have to wait for the other threads, but it's simpler than canceling them
523- for( int i = 0; i < h->param.i_threads; i++ )
524+ for( int i = 0; i < h->i_thread_frames; i++ )
525 if( h->thread[i]->b_thread_active )
526- x264_pthread_join( h->thread[i]->thread_handle, NULL );
527- if( h->i_thread_frames > 1 )
528- {
529- for( int i = 0; i < h->i_thread_frames; i++ )
530- if( h->thread[i]->b_thread_active )
531- {
532- assert( h->thread[i]->fenc->i_reference_count == 1 );
533- x264_frame_delete( h->thread[i]->fenc );
534- }
535+ {
536+ assert( h->thread[i]->fenc->i_reference_count == 1 );
537+ x264_frame_delete( h->thread[i]->fenc );
538+ }
539
540- x264_t *thread_prev = h->thread[h->i_thread_phase];
541- x264_thread_sync_ratecontrol( h, thread_prev, h );
542- x264_thread_sync_ratecontrol( thread_prev, thread_prev, h );
543- h->i_frame = thread_prev->i_frame + 1 - h->i_thread_frames;
544- }
545+ x264_t *thread_prev = h->thread[h->i_thread_phase];
546+ x264_thread_sync_ratecontrol( h, thread_prev, h );
547+ x264_thread_sync_ratecontrol( thread_prev, thread_prev, h );
548+ h->i_frame = thread_prev->i_frame + 1 - h->i_thread_frames;
549 }
550 h->i_frame++;
551
552diff --git a/encoder/lookahead.c b/encoder/lookahead.c
553index a79d4b1..f0af216 100644
554--- a/encoder/lookahead.c
555+++ b/encoder/lookahead.c
556@@ -37,7 +37,7 @@
557 #include "common/common.h"
558 #include "analyse.h"
559
560-static void x264_lookahead_shift( x264_synch_frame_list_t *dst, x264_synch_frame_list_t *src, int count )
561+static void x264_lookahead_shift( x264_sync_frame_list_t *dst, x264_sync_frame_list_t *src, int count )
562 {
563 int i = count;
564 while( i-- )
565@@ -137,9 +137,9 @@ int x264_lookahead_init( x264_t *h, int i_slicetype_length )
566 look->i_slicetype_length = i_slicetype_length;
567
568 /* init frame lists */
569- if( x264_synch_frame_list_init( &look->ifbuf, h->param.i_sync_lookahead+3 ) ||
570- x264_synch_frame_list_init( &look->next, h->frames.i_delay+3 ) ||
571- x264_synch_frame_list_init( &look->ofbuf, h->frames.i_delay+3 ) )
572+ if( x264_sync_frame_list_init( &look->ifbuf, h->param.i_sync_lookahead+3 ) ||
573+ x264_sync_frame_list_init( &look->next, h->frames.i_delay+3 ) ||
574+ x264_sync_frame_list_init( &look->ofbuf, h->frames.i_delay+3 ) )
575 goto fail;
576
577 if( !h->param.i_sync_lookahead )
578@@ -153,7 +153,7 @@ int x264_lookahead_init( x264_t *h, int i_slicetype_length )
579 if( x264_macroblock_thread_allocate( look_h, 1 ) < 0 )
580 goto fail;
581
582- if( x264_pthread_create( &look_h->thread_handle, NULL, (void *)x264_lookahead_thread, look_h ) )
583+ if( x264_pthread_create( &look->thread_handle, NULL, (void*)x264_lookahead_thread, look_h ) )
584 goto fail;
585 look->b_thread_active = 1;
586
587@@ -171,25 +171,25 @@ void x264_lookahead_delete( x264_t *h )
588 h->lookahead->b_exit_thread = 1;
589 x264_pthread_cond_broadcast( &h->lookahead->ifbuf.cv_fill );
590 x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex );
591- x264_pthread_join( h->thread[h->param.i_threads]->thread_handle, NULL );
592+ x264_pthread_join( h->lookahead->thread_handle, NULL );
593 x264_macroblock_cache_free( h->thread[h->param.i_threads] );
594 x264_macroblock_thread_free( h->thread[h->param.i_threads], 1 );
595 x264_free( h->thread[h->param.i_threads] );
596 }
597- x264_synch_frame_list_delete( &h->lookahead->ifbuf );
598- x264_synch_frame_list_delete( &h->lookahead->next );
599+ x264_sync_frame_list_delete( &h->lookahead->ifbuf );
600+ x264_sync_frame_list_delete( &h->lookahead->next );
601 if( h->lookahead->last_nonb )
602 x264_frame_push_unused( h, h->lookahead->last_nonb );
603- x264_synch_frame_list_delete( &h->lookahead->ofbuf );
604+ x264_sync_frame_list_delete( &h->lookahead->ofbuf );
605 x264_free( h->lookahead );
606 }
607
608 void x264_lookahead_put_frame( x264_t *h, x264_frame_t *frame )
609 {
610 if( h->param.i_sync_lookahead )
611- x264_synch_frame_list_push( &h->lookahead->ifbuf, frame );
612+ x264_sync_frame_list_push( &h->lookahead->ifbuf, frame );
613 else
614- x264_synch_frame_list_push( &h->lookahead->next, frame );
615+ x264_sync_frame_list_push( &h->lookahead->next, frame );
616 }
617
618 int x264_lookahead_is_empty( x264_t *h )
619diff --git a/input/thread.c b/input/thread.c
620index a88cfae..c4b07fa 100644
621--- a/input/thread.c
622+++ b/input/thread.c
623@@ -30,10 +30,9 @@ typedef struct
624 cli_input_t input;
625 hnd_t p_handle;
626 x264_picture_t pic;
627- x264_pthread_t tid;
628+ x264_threadpool_t *pool;
629 int next_frame;
630 int frame_total;
631- int in_progress;
632 struct thread_input_arg_t *next_args;
633 } thread_hnd_t;
634
635@@ -55,7 +54,6 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
636 }
637 h->input = input;
638 h->p_handle = *p_handle;
639- h->in_progress = 0;
640 h->next_frame = -1;
641 h->next_args = malloc( sizeof(thread_input_arg_t) );
642 if( !h->next_args )
643@@ -66,6 +64,9 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
644 thread_input.picture_alloc = h->input.picture_alloc;
645 thread_input.picture_clean = h->input.picture_clean;
646
647+ if( x264_threadpool_init( &h->pool, 1, NULL, NULL ) )
648+ return -1;
649+
650 *p_handle = h;
651 return 0;
652 }
653@@ -88,9 +89,8 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
654
655 if( h->next_frame >= 0 )
656 {
657- x264_pthread_join( h->tid, NULL );
658+ x264_threadpool_wait( h->pool, h->next_args );
659 ret |= h->next_args->status;
660- h->in_progress = 0;
661 }
662
663 if( h->next_frame == i_frame )
664@@ -103,9 +103,7 @@ static int read_frame( x264_picture_t *p_pic, hnd_t handle, int i_frame )
665 h->next_frame =
666 h->next_args->i_frame = i_frame+1;
667 h->next_args->pic = &h->pic;
668- if( x264_pthread_create( &h->tid, NULL, (void*)read_frame_thread_int, h->next_args ) )
669- return -1;
670- h->in_progress = 1;
671+ x264_threadpool_run( h->pool, (void*)read_frame_thread_int, h->next_args );
672 }
673 else
674 h->next_frame = -1;
675@@ -124,8 +122,7 @@ static int release_frame( x264_picture_t *pic, hnd_t handle )
676 static int close_file( hnd_t handle )
677 {
678 thread_hnd_t *h = handle;
679- if( h->in_progress )
680- x264_pthread_join( h->tid, NULL );
681+ x264_threadpool_delete( h->pool );
682 h->input.close_file( h->p_handle );
683 h->input.picture_clean( &h->pic );
684 free( h->next_args );
685--
6861.7.0.4
687
688
689From 0496fd76623fb8dd72eefd4b20719f27565913c3 Mon Sep 17 00:00:00 2001
690From: Lamont Alston <wewk584@gmail.com>
691Date: Wed, 16 Jun 2010 10:05:17 -0700
692Subject: [PATCH 3/9] Add open-GOP support
693
694---
695 common/common.c | 6 +++-
696 common/common.h | 8 +++++-
697 encoder/encoder.c | 48 +++++++++++++++++++++++++-------------
698 encoder/lookahead.c | 2 +-
699 encoder/ratecontrol.c | 1 +
700 encoder/slicetype.c | 61 ++++++++++++++++++++++++++++++++++--------------
701 x264.c | 7 +++++-
702 x264.h | 4 ++-
703 8 files changed, 95 insertions(+), 42 deletions(-)
704
705diff --git a/common/common.c b/common/common.c
706index 4fa5e4b..5ccd541 100644
707--- a/common/common.c
708+++ b/common/common.c
709@@ -699,6 +699,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
710 p->i_slice_max_mbs = atoi(value);
711 OPT("slices")
712 p->i_slice_count = atoi(value);
713+ OPT("open-gop")
714+ p->b_open_gop = atobool(value);
715 OPT("cabac")
716 p->b_cabac = atobool(value);
717 OPT("cabac-idc")
718@@ -1186,9 +1188,9 @@ char *x264_param2string( x264_param_t *p, int b_res )
719 s += sprintf( s, " bframes=%d", p->i_bframe );
720 if( p->i_bframe )
721 {
722- s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d weightb=%d",
723+ s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d weightb=%d open_gop=%d",
724 p->i_bframe_pyramid, p->i_bframe_adaptive, p->i_bframe_bias,
725- p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred );
726+ p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred, p->b_open_gop );
727 }
728 s += sprintf( s, " weightp=%d", p->analyse.i_weighted_pred > 0 ? p->analyse.i_weighted_pred : 0 );
729
730diff --git a/common/common.h b/common/common.h
731index 659c2a4..19e5d32 100644
732--- a/common/common.h
733+++ b/common/common.h
734@@ -362,7 +362,7 @@ typedef struct x264_lookahead_t
735 volatile uint8_t b_exit_thread;
736 uint8_t b_thread_active;
737 uint8_t b_analyse_keyframe;
738- int i_last_keyframe;
739+ int i_last_coded_keyframe;
740 int i_slicetype_length;
741 x264_frame_t *last_nonb;
742 x264_pthread_t thread_handle;
743@@ -470,7 +470,11 @@ struct x264_t
744 /* frames used for reference + sentinels */
745 x264_frame_t *reference[16+2];
746
747- int i_last_keyframe; /* Frame number of the last keyframe */
748+ int i_last_coded_keyframe; /* Frame number of the last keyframe coding order */
749+ int i_last_idr; /* Frame number of the last IDR (not RP)*/
750+ int i_poc_last_open_gop; /* Poc of the I frame of the last open-gop. The value
751+ * is only assigned during the period between that
752+ * I frame and the next P or I frame, else -1 */
753
754 int i_input; /* Number of input frames already accepted */
755
756diff --git a/encoder/encoder.c b/encoder/encoder.c
757index 0d33915..3e7f227 100644
758--- a/encoder/encoder.c
759+++ b/encoder/encoder.c
760@@ -573,12 +573,9 @@ static int x264_validate_parameters( x264_t *h )
761 x264_log( h, X264_LOG_WARNING, "subme=0 + direct=temporal is not supported\n" );
762 h->param.analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
763 }
764- h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_BFRAME_MAX );
765+ h->param.i_bframe = x264_clip3( h->param.i_bframe, 0, X264_MIN( X264_BFRAME_MAX, h->param.i_keyint_max-1 ) );
766 if( h->param.i_keyint_max == 1 )
767- {
768- h->param.i_bframe = 0;
769 h->param.b_intra_refresh = 0;
770- }
771 h->param.i_bframe_bias = x264_clip3( h->param.i_bframe_bias, -90, 100 );
772 if( h->param.i_bframe <= 1 )
773 h->param.i_bframe_pyramid = X264_B_PYRAMID_NONE;
774@@ -588,6 +585,7 @@ static int x264_validate_parameters( x264_t *h )
775 h->param.i_bframe_adaptive = X264_B_ADAPT_NONE;
776 h->param.analyse.i_direct_mv_pred = 0;
777 h->param.analyse.b_weighted_bipred = 0;
778+ h->param.b_open_gop = 0;
779 }
780 if( h->param.b_intra_refresh && h->param.i_bframe_pyramid == X264_B_PYRAMID_NORMAL )
781 {
782@@ -599,6 +597,11 @@ static int x264_validate_parameters( x264_t *h )
783 x264_log( h, X264_LOG_WARNING, "ref > 1 + intra-refresh is not supported\n" );
784 h->param.i_frame_reference = 1;
785 }
786+ if( h->param.b_intra_refresh && h->param.b_open_gop )
787+ {
788+ x264_log( h, X264_LOG_WARNING, "intra-refresh is not compatible with open-gop\n" );
789+ h->param.b_open_gop = 0;
790+ }
791 if( h->param.i_keyint_min == X264_KEYINT_MIN_AUTO )
792 h->param.i_keyint_min = h->param.i_keyint_max / 10;
793 h->param.i_keyint_min = x264_clip3( h->param.i_keyint_min, 1, h->param.i_keyint_max/2+1 );
794@@ -978,9 +981,11 @@ x264_t *x264_encoder_open( x264_param_t *param )
795 h->frames.b_have_lowres |= h->param.rc.b_stat_read && h->param.rc.i_vbv_buffer_size > 0;
796 h->frames.b_have_sub8x8_esa = !!(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
797
798- h->frames.i_last_keyframe = - h->param.i_keyint_max;
799+ h->frames.i_last_idr =
800+ h->frames.i_last_coded_keyframe = - h->param.i_keyint_max;
801 h->frames.i_input = 0;
802 h->frames.i_largest_pts = h->frames.i_second_largest_pts = -1;
803+ h->frames.i_poc_last_open_gop = -1;
804
805 CHECKED_MALLOCZERO( h->frames.unused[0], (h->frames.i_delay + 3) * sizeof(x264_frame_t *) );
806 /* Allocate room for max refs plus a few extra just in case. */
807@@ -1688,35 +1693,37 @@ static inline void x264_reference_hierarchy_reset( x264_t *h )
808 {
809 int ref;
810 int b_hasdelayframe = 0;
811- if( !h->param.i_bframe_pyramid )
812- return;
813
814 /* look for delay frames -- chain must only contain frames that are disposable */
815 for( int i = 0; h->frames.current[i] && IS_DISPOSABLE( h->frames.current[i]->i_type ); i++ )
816 b_hasdelayframe |= h->frames.current[i]->i_coded
817 != h->frames.current[i]->i_frame + h->sps->vui.i_num_reorder_frames;
818
819- if( h->param.i_bframe_pyramid != X264_B_PYRAMID_STRICT && !b_hasdelayframe )
820+ /* This function must handle b-pyramid and clear frames for open-gop */
821+ if( h->param.i_bframe_pyramid != X264_B_PYRAMID_STRICT && !b_hasdelayframe && h->frames.i_poc_last_open_gop == -1 )
822 return;
823
824 /* Remove last BREF. There will never be old BREFs in the
825 * dpb during a BREF decode when pyramid == STRICT */
826 for( ref = 0; h->frames.reference[ref]; ref++ )
827 {
828- if( h->param.i_bframe_pyramid == X264_B_PYRAMID_STRICT
829+ if( ( h->param.i_bframe_pyramid == X264_B_PYRAMID_STRICT
830 && h->frames.reference[ref]->i_type == X264_TYPE_BREF )
831+ || ( h->frames.reference[ref]->i_poc < h->frames.i_poc_last_open_gop
832+ && h->sh.i_type != SLICE_TYPE_B ) )
833 {
834 int diff = h->i_frame_num - h->frames.reference[ref]->i_frame_num;
835 h->sh.mmco[h->sh.i_mmco_command_count].i_difference_of_pic_nums = diff;
836 h->sh.mmco[h->sh.i_mmco_command_count++].i_poc = h->frames.reference[ref]->i_poc;
837- x264_frame_push_unused( h, x264_frame_pop( h->frames.reference ) );
838+ x264_frame_push_unused( h, x264_frame_shift( &h->frames.reference[ref] ) );
839 h->b_ref_reorder[0] = 1;
840- break;
841+ ref--;
842 }
843 }
844
845- /* Prepare to room in the dpb for the delayed display time of the later b-frame's */
846- h->sh.i_mmco_remove_from_end = X264_MAX( ref + 2 - h->frames.i_max_dpb, 0 );
847+ /* Prepare room in the dpb for the delayed display time of the later b-frame's */
848+ if( h->param.i_bframe_pyramid )
849+ h->sh.i_mmco_remove_from_end = X264_MAX( ref + 2 - h->frames.i_max_dpb, 0 );
850 }
851
852 static inline void x264_slice_init( x264_t *h, int i_nal_type, int i_global_qp )
853@@ -2319,14 +2326,19 @@ int x264_encoder_encode( x264_t *h,
854
855 if( h->fenc->b_keyframe )
856 {
857- h->frames.i_last_keyframe = h->fenc->i_frame;
858+ h->frames.i_last_coded_keyframe = h->fenc->i_frame;
859 if( h->fenc->i_type == X264_TYPE_IDR )
860+ {
861 h->i_frame_num = 0;
862+ h->frames.i_last_idr = h->fenc->i_frame;
863+ }
864 }
865 h->sh.i_mmco_command_count =
866 h->sh.i_mmco_remove_from_end = 0;
867 h->b_ref_reorder[0] =
868 h->b_ref_reorder[1] = 0;
869+ h->fdec->i_poc =
870+ h->fenc->i_poc = 2 * ( h->fenc->i_frame - X264_MAX( h->frames.i_last_idr, 0 ) );
871
872 /* ------------------- Setup frame context ----------------------------- */
873 /* 5: Init data dependent of frame type */
874@@ -2337,6 +2349,7 @@ int x264_encoder_encode( x264_t *h,
875 i_nal_ref_idc = NAL_PRIORITY_HIGHEST;
876 h->sh.i_type = SLICE_TYPE_I;
877 x264_reference_reset( h );
878+ h->frames.i_poc_last_open_gop = -1;
879 }
880 else if( h->fenc->i_type == X264_TYPE_I )
881 {
882@@ -2344,6 +2357,8 @@ int x264_encoder_encode( x264_t *h,
883 i_nal_ref_idc = NAL_PRIORITY_HIGH; /* Not completely true but for now it is (as all I/P are kept as ref)*/
884 h->sh.i_type = SLICE_TYPE_I;
885 x264_reference_hierarchy_reset( h );
886+ if( h->param.b_open_gop )
887+ h->frames.i_poc_last_open_gop = h->fenc->b_keyframe ? h->fenc->i_poc : -1;
888 }
889 else if( h->fenc->i_type == X264_TYPE_P )
890 {
891@@ -2351,6 +2366,7 @@ int x264_encoder_encode( x264_t *h,
892 i_nal_ref_idc = NAL_PRIORITY_HIGH; /* Not completely true but for now it is (as all I/P are kept as ref)*/
893 h->sh.i_type = SLICE_TYPE_P;
894 x264_reference_hierarchy_reset( h );
895+ h->frames.i_poc_last_open_gop = -1;
896 }
897 else if( h->fenc->i_type == X264_TYPE_BREF )
898 {
899@@ -2366,8 +2382,6 @@ int x264_encoder_encode( x264_t *h,
900 h->sh.i_type = SLICE_TYPE_B;
901 }
902
903- h->fdec->i_poc =
904- h->fenc->i_poc = 2 * (h->fenc->i_frame - h->frames.i_last_keyframe);
905 h->fdec->i_type = h->fenc->i_type;
906 h->fdec->i_frame = h->fenc->i_frame;
907 h->fenc->b_kept_as_ref =
908@@ -2484,7 +2498,7 @@ int x264_encoder_encode( x264_t *h,
909
910 if( h->fenc->i_type != X264_TYPE_IDR )
911 {
912- int time_to_recovery = X264_MIN( h->mb.i_mb_width - 1, h->param.i_keyint_max ) + h->param.i_bframe;
913+ int time_to_recovery = h->param.b_open_gop ? 0 : X264_MIN( h->mb.i_mb_width - 1, h->param.i_keyint_max ) + h->param.i_bframe;
914 x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
915 x264_sei_recovery_point_write( h, &h->out.bs, time_to_recovery );
916 x264_nal_end( h );
917diff --git a/encoder/lookahead.c b/encoder/lookahead.c
918index f0af216..6994829 100644
919--- a/encoder/lookahead.c
920+++ b/encoder/lookahead.c
921@@ -131,7 +131,7 @@ int x264_lookahead_init( x264_t *h, int i_slicetype_length )
922 for( int i = 0; i < h->param.i_threads; i++ )
923 h->thread[i]->lookahead = look;
924
925- look->i_last_keyframe = - h->param.i_keyint_max;
926+ look->i_last_coded_keyframe = - h->param.i_keyint_max;
927 look->b_analyse_keyframe = (h->param.rc.b_mb_tree || (h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead))
928 && !h->param.rc.b_stat_read;
929 look->i_slicetype_length = i_slicetype_length;
930diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
931index 2c05ad7..f30df22 100644
932--- a/encoder/ratecontrol.c
933+++ b/encoder/ratecontrol.c
934@@ -704,6 +704,7 @@ int x264_ratecontrol_new( x264_t *h )
935 CMP_OPT_FIRST_PASS( "b_pyramid", h->param.i_bframe_pyramid );
936 CMP_OPT_FIRST_PASS( "intra_refresh", h->param.b_intra_refresh );
937 CMP_OPT_FIRST_PASS( "keyint", h->param.i_keyint_max );
938+ CMP_OPT_FIRST_PASS( "open_gop", h->param.b_open_gop );
939
940 if( strstr( opts, "qp=0" ) && h->param.rc.i_rc_method == X264_RC_ABR )
941 x264_log( h, X264_LOG_WARNING, "1st pass was lossless, bitrate prediction will be inaccurate\n" );
942diff --git a/encoder/slicetype.c b/encoder/slicetype.c
943index 60f3a24..0762c99 100644
944--- a/encoder/slicetype.c
945+++ b/encoder/slicetype.c
946@@ -981,7 +981,7 @@ static int scenecut_internal( x264_t *h, x264_mb_analysis_t *a, x264_frame_t **f
947 int icost = frame->i_cost_est[0][0];
948 int pcost = frame->i_cost_est[p1-p0][0];
949 float f_bias;
950- int i_gop_size = frame->i_frame - h->lookahead->i_last_keyframe;
951+ int i_gop_size = frame->i_frame - h->lookahead->i_last_coded_keyframe;
952 float f_thresh_max = h->param.i_scenecut_threshold / 100.0;
953 /* magic numbers pulled out of thin air */
954 float f_thresh_min = f_thresh_max * h->param.i_keyint_min
955@@ -1058,7 +1058,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
956 {
957 x264_mb_analysis_t a;
958 x264_frame_t *frames[X264_LOOKAHEAD_MAX+3] = { NULL, };
959- int num_frames, orig_num_frames, keyint_limit, idr_frame_type, framecnt;
960+ int num_frames, orig_num_frames, keyint_limit, framecnt;
961 int i_mb_count = NUM_MBS;
962 int cost1p0, cost2p0, cost1b1, cost2p1;
963 int i_max_search = X264_MIN( h->lookahead->next.i_size, X264_LOOKAHEAD_MAX );
964@@ -1076,11 +1076,10 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
965 if( !framecnt )
966 return;
967
968- keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->lookahead->i_last_keyframe - 1;
969+ keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->lookahead->i_last_coded_keyframe - 1;
970 orig_num_frames = num_frames = h->param.b_intra_refresh ? framecnt : X264_MIN( framecnt, keyint_limit );
971
972 x264_lowres_context_init( h, &a );
973- idr_frame_type = frames[1]->i_frame - h->lookahead->i_last_keyframe >= h->param.i_keyint_min ? X264_TYPE_IDR : X264_TYPE_I;
974
975 /* This is important psy-wise: if we have a non-scenecut keyframe,
976 * there will be significant visual artifacts if the frames just before
977@@ -1092,12 +1091,12 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
978 {
979 frames[1]->i_type = X264_TYPE_P;
980 if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1, 1, orig_num_frames ) )
981- frames[1]->i_type = idr_frame_type;
982+ frames[1]->i_type = X264_TYPE_I;
983 return;
984 }
985 else if( num_frames == 0 )
986 {
987- frames[1]->i_type = idr_frame_type;
988+ frames[1]->i_type = X264_TYPE_I;
989 return;
990 }
991
992@@ -1106,7 +1105,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
993 int reset_start;
994 if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1, 1, orig_num_frames ) )
995 {
996- frames[1]->i_type = idr_frame_type;
997+ frames[1]->i_type = X264_TYPE_I;
998 return;
999 }
1000
1001@@ -1210,15 +1209,19 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
1002
1003 /* Enforce keyframe limit. */
1004 if( !h->param.b_intra_refresh )
1005- for( int j = 0; j < num_frames; j++ )
1006+ for( int i = keyint_limit+1; i <= num_frames; i += h->param.i_keyint_max )
1007 {
1008- if( ((j-keyint_limit) % h->param.i_keyint_max) == 0 )
1009+ int j = i;
1010+ if( h->param.b_open_gop )
1011 {
1012- if( j && h->param.i_keyint_max > 1 )
1013- frames[j]->i_type = X264_TYPE_P;
1014- frames[j+1]->i_type = X264_TYPE_IDR;
1015- reset_start = X264_MIN( reset_start, j+2 );
1016+ while( IS_X264_TYPE_B( frames[i]->i_type ) )
1017+ i++;
1018+ while( IS_X264_TYPE_B( frames[j-1]->i_type ) )
1019+ j--;
1020 }
1021+ frames[i]->i_type = X264_TYPE_I;
1022+ reset_start = X264_MIN( reset_start, i+1 );
1023+ i = j;
1024 }
1025
1026 if( h->param.rc.i_vbv_buffer_size )
1027@@ -1303,18 +1306,40 @@ void x264_slicetype_decide( x264_t *h )
1028 frm->i_frame, x264_b_pyramid_names[h->param.i_bframe_pyramid], h->param.i_frame_reference );
1029 }
1030
1031+ if( frm->i_type == X264_TYPE_KEYFRAME )
1032+ frm->i_type = h->param.b_open_gop ? X264_TYPE_I : X264_TYPE_IDR;
1033+
1034 /* Limit GOP size */
1035- if( (!h->param.b_intra_refresh || frm->i_frame == 0) && frm->i_frame - h->lookahead->i_last_keyframe >= h->param.i_keyint_max )
1036+ if( (!h->param.b_intra_refresh || frm->i_frame == 0) && frm->i_frame - h->lookahead->i_last_coded_keyframe >= h->param.i_keyint_max )
1037+ {
1038+ if( frm->i_type == X264_TYPE_AUTO || frm->i_type == X264_TYPE_I )
1039+ frm->i_type = h->param.b_open_gop && h->lookahead->i_last_coded_keyframe >= 0 ? X264_TYPE_I : X264_TYPE_IDR;
1040+ int warn = frm->i_type != X264_TYPE_IDR;
1041+ if( warn && h->param.b_open_gop )
1042+ {
1043+ /* if this minigop ends with i, it's not a violation */
1044+ int j = bframes;
1045+ while( IS_X264_TYPE_B( h->lookahead->next.list[j]->i_type ) )
1046+ j++;
1047+ warn = h->lookahead->next.list[j]->i_type != X264_TYPE_I && h->lookahead->next.list[j]->i_type != X264_TYPE_KEYFRAME;
1048+ }
1049+ if( warn )
1050+ x264_log( h, X264_LOG_WARNING, "specified frame type (%d) at %d is not compatible with keyframe interval\n", frm->i_type, frm->i_frame );
1051+ }
1052+ if( frm->i_type == X264_TYPE_I && frm->i_frame - h->lookahead->i_last_coded_keyframe >= h->param.i_keyint_min )
1053 {
1054- if( frm->i_type == X264_TYPE_AUTO )
1055+ if( h->param.b_open_gop )
1056+ {
1057+ h->lookahead->i_last_coded_keyframe = frm->i_frame - bframes;
1058+ frm->b_keyframe = 1;
1059+ }
1060+ else
1061 frm->i_type = X264_TYPE_IDR;
1062- if( frm->i_type != X264_TYPE_IDR )
1063- x264_log( h, X264_LOG_WARNING, "specified frame type (%d) is not compatible with keyframe interval\n", frm->i_type );
1064 }
1065 if( frm->i_type == X264_TYPE_IDR )
1066 {
1067 /* Close GOP */
1068- h->lookahead->i_last_keyframe = frm->i_frame;
1069+ h->lookahead->i_last_coded_keyframe = frm->i_frame;
1070 frm->b_keyframe = 1;
1071 if( bframes > 0 )
1072 {
1073diff --git a/x264.c b/x264.c
1074index a124083..eba72c5 100644
1075--- a/x264.c
1076+++ b/x264.c
1077@@ -380,6 +380,8 @@ static void Help( x264_param_t *defaults, int longhelp )
1078 " - strict: Strictly hierarchical pyramid\n"
1079 " - normal: Non-strict (not Blu-ray compatible)\n",
1080 strtable_lookup( x264_b_pyramid_names, defaults->i_bframe_pyramid ) );
1081+ H1( " --open-gop Use recovery points to close GOPs\n"
1082+ " Only available with b-frames\n" );
1083 H1( " --no-cabac Disable CABAC\n" );
1084 H1( " -r, --ref <integer> Number of reference frames [%d]\n", defaults->i_frame_reference );
1085 H1( " --no-deblock Disable loop filter\n" );
1086@@ -441,7 +443,8 @@ static void Help( x264_param_t *defaults, int longhelp )
1087 " or b=<float> (bitrate multiplier)\n" );
1088 H2( " --qpfile <string> Force frametypes and QPs for some or all frames\n"
1089 " Format of each line: framenumber frametype QP\n"
1090- " QP of -1 lets x264 choose. Frametypes: I,i,P,B,b.\n"
1091+ " QP of -1 lets x264 choose. Frametypes: I,i,K,P,B,b.\n"
1092+ " K=<I or i> depending on open-gop setting\n"
1093 " QPs are restricted by qpmin/qpmax.\n" );
1094 H1( "\n" );
1095 H1( "Analysis:\n" );
1096@@ -627,6 +630,7 @@ static struct option long_options[] =
1097 { "no-b-adapt", no_argument, NULL, 0 },
1098 { "b-bias", required_argument, NULL, 0 },
1099 { "b-pyramid", required_argument, NULL, 0 },
1100+ { "open-gop", no_argument, NULL, 0 },
1101 { "min-keyint", required_argument, NULL, 'i' },
1102 { "keyint", required_argument, NULL, 'I' },
1103 { "intra-refresh", no_argument, NULL, 0 },
1104@@ -1304,6 +1308,7 @@ static void parse_qpfile( cli_opt_t *opt, x264_picture_t *pic, int i_frame )
1105 pic->i_qpplus1 = qp+1;
1106 if ( type == 'I' ) pic->i_type = X264_TYPE_IDR;
1107 else if( type == 'i' ) pic->i_type = X264_TYPE_I;
1108+ else if( type == 'K' ) pic->i_type = X264_TYPE_KEYFRAME;
1109 else if( type == 'P' ) pic->i_type = X264_TYPE_P;
1110 else if( type == 'B' ) pic->i_type = X264_TYPE_BREF;
1111 else if( type == 'b' ) pic->i_type = X264_TYPE_B;
1112diff --git a/x264.h b/x264.h
1113index 9cd4600..b1402c9 100644
1114--- a/x264.h
1115+++ b/x264.h
1116@@ -35,7 +35,7 @@
1117
1118 #include <stdarg.h>
1119
1120-#define X264_BUILD 98
1121+#define X264_BUILD 99
1122
1123 /* x264_t:
1124 * opaque handler for encoder */
1125@@ -138,6 +138,7 @@ static const char * const x264_nal_hrd_names[] = { "none", "vbr", "cbr", 0 };
1126 #define X264_TYPE_P 0x0003
1127 #define X264_TYPE_BREF 0x0004 /* Non-disposable B-frame */
1128 #define X264_TYPE_B 0x0005
1129+#define X264_TYPE_KEYFRAME 0x0006 /* IDR or I depending on b_open_gop option */
1130 #define IS_X264_TYPE_I(x) ((x)==X264_TYPE_I || (x)==X264_TYPE_IDR)
1131 #define IS_X264_TYPE_B(x) ((x)==X264_TYPE_B || (x)==X264_TYPE_BREF)
1132
1133@@ -221,6 +222,7 @@ typedef struct x264_param_t
1134 int i_bframe_adaptive;
1135 int i_bframe_bias;
1136 int i_bframe_pyramid; /* Keep some B-frames as references: 0=off, 1=strict hierarchical, 2=normal */
1137+ int b_open_gop;
1138
1139 int b_deblocking_filter;
1140 int i_deblocking_filter_alphac0; /* [-6, 6] -6 light filter, 6 strong */
1141--
11421.7.0.4
1143
1144
1145From 7ce61c25c289ef641349c2f4295a4f61dd173557 Mon Sep 17 00:00:00 2001
1146From: Jason Garrett-Glaser <darkshikari@gmail.com>
1147Date: Thu, 17 Jun 2010 14:50:07 -0700
1148Subject: [PATCH 4/9] Lookaheadless MB-tree support
1149 Uses past motion information instead of future data from the lookahead.
1150 Not as accurate, but better than nothing in zero-latency compression when a lookahead isn't available.
1151 Currently resets on keyframes, so only available if intra-refresh is set, to avoid pops on non-scenecut keyframes.
1152
1153Also slightly modify encoding presets: disable rc-lookahead in the fastest presets.
1154Enable MB-tree in "veryfast", albeit with a very short lookahead.
1155---
1156 common/common.c | 4 +++-
1157 encoder/encoder.c | 7 ++++++-
1158 encoder/slicetype.c | 48 ++++++++++++++++++++++++++++++++++--------------
1159 x264.c | 14 +++++++-------
1160 4 files changed, 50 insertions(+), 23 deletions(-)
1161
1162diff --git a/common/common.c b/common/common.c
1163index 5ccd541..9e86f93 100644
1164--- a/common/common.c
1165+++ b/common/common.c
1166@@ -184,6 +184,7 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
1167 param->rc.b_mb_tree = 0;
1168 param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
1169 param->analyse.b_weighted_bipred = 0;
1170+ param->rc.i_lookahead = 0;
1171 }
1172 else if( !strcasecmp( preset, "superfast" ) )
1173 {
1174@@ -195,6 +196,7 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
1175 param->analyse.i_trellis = 0;
1176 param->rc.b_mb_tree = 0;
1177 param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
1178+ param->rc.i_lookahead = 0;
1179 }
1180 else if( !strcasecmp( preset, "veryfast" ) )
1181 {
1182@@ -203,8 +205,8 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset )
1183 param->i_frame_reference = 1;
1184 param->analyse.b_mixed_references = 0;
1185 param->analyse.i_trellis = 0;
1186- param->rc.b_mb_tree = 0;
1187 param->analyse.i_weighted_pred = X264_WEIGHTP_NONE;
1188+ param->rc.i_lookahead = 10;
1189 }
1190 else if( !strcasecmp( preset, "faster" ) )
1191 {
1192diff --git a/encoder/encoder.c b/encoder/encoder.c
1193index 3e7f227..2b0e017 100644
1194--- a/encoder/encoder.c
1195+++ b/encoder/encoder.c
1196@@ -620,8 +620,13 @@ static int x264_validate_parameters( x264_t *h )
1197 }
1198
1199 h->param.rc.f_qcompress = x264_clip3f( h->param.rc.f_qcompress, 0.0, 1.0 );
1200- if( !h->param.rc.i_lookahead || h->param.i_keyint_max == 1 || h->param.rc.f_qcompress == 1 )
1201+ if( h->param.i_keyint_max == 1 || h->param.rc.f_qcompress == 1 )
1202 h->param.rc.b_mb_tree = 0;
1203+ if( !h->param.rc.i_lookahead && !h->param.b_intra_refresh && h->param.rc.b_mb_tree )
1204+ {
1205+ x264_log( h, X264_LOG_WARNING, "lookaheadless mb-tree requires intra refresh\n" );
1206+ h->param.rc.b_mb_tree = 0;
1207+ }
1208 if( h->param.rc.b_stat_read )
1209 h->param.rc.i_lookahead = 0;
1210 #if HAVE_PTHREAD
1211diff --git a/encoder/slicetype.c b/encoder/slicetype.c
1212index 0762c99..83948fc 100644
1213--- a/encoder/slicetype.c
1214+++ b/encoder/slicetype.c
1215@@ -734,7 +734,7 @@ static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, in
1216 }
1217 }
1218
1219- if( h->param.rc.i_vbv_buffer_size && referenced )
1220+ if( h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead && referenced )
1221 x264_macroblock_tree_finish( h, frames[b], b == p1 ? b - p0 : 0 );
1222 }
1223
1224@@ -743,7 +743,8 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
1225 int idx = !b_intra;
1226 int last_nonb, cur_nonb = 1;
1227 int bframes = 0;
1228- int i = num_frames - 1;
1229+ int i = num_frames;
1230+
1231 if( b_intra )
1232 x264_slicetype_frame_cost( h, a, frames, 0, 0, 0, 0 );
1233
1234@@ -751,10 +752,25 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
1235 i--;
1236 last_nonb = i;
1237
1238- if( last_nonb < idx )
1239- return;
1240
1241- memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
1242+ if( !h->param.rc.i_lookahead )
1243+ {
1244+ if( b_intra )
1245+ {
1246+ memset( frames[0]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
1247+ memcpy( frames[0]->f_qp_offset, frames[0]->f_qp_offset_aq, h->mb.i_mb_count * sizeof(float) );
1248+ return;
1249+ }
1250+ XCHG( uint16_t*, frames[last_nonb]->i_propagate_cost, frames[0]->i_propagate_cost );
1251+ memset( frames[0]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
1252+ }
1253+ else
1254+ {
1255+ if( last_nonb < idx )
1256+ return;
1257+ memset( frames[last_nonb]->i_propagate_cost, 0, h->mb.i_mb_count * sizeof(uint16_t) );
1258+ }
1259+
1260 while( i-- > idx )
1261 {
1262 cur_nonb = i;
1263@@ -796,6 +812,12 @@ static void x264_macroblock_tree( x264_t *h, x264_mb_analysis_t *a, x264_frame_t
1264 last_nonb = cur_nonb;
1265 }
1266
1267+ if( !h->param.rc.i_lookahead )
1268+ {
1269+ x264_macroblock_tree_propagate( h, frames, 0, last_nonb, last_nonb, 1 );
1270+ XCHG( uint16_t*, frames[last_nonb]->i_propagate_cost, frames[0]->i_propagate_cost );
1271+ }
1272+
1273 x264_macroblock_tree_finish( h, frames[last_nonb], last_nonb );
1274 if( h->param.i_bframe_pyramid && bframes > 1 && !h->param.rc.i_vbv_buffer_size )
1275 x264_macroblock_tree_finish( h, frames[last_nonb+(bframes+1)/2], 0 );
1276@@ -1062,6 +1084,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
1277 int i_mb_count = NUM_MBS;
1278 int cost1p0, cost2p0, cost1b1, cost2p1;
1279 int i_max_search = X264_MIN( h->lookahead->next.i_size, X264_LOOKAHEAD_MAX );
1280+ int vbv_lookahead = h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead;
1281 if( h->param.b_deterministic )
1282 i_max_search = X264_MIN( i_max_search, h->lookahead->i_slicetype_length + !keyframe );
1283
1284@@ -1074,7 +1097,11 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
1285 frames[framecnt+1] = h->lookahead->next.list[framecnt];
1286
1287 if( !framecnt )
1288+ {
1289+ if( h->param.rc.b_mb_tree )
1290+ x264_macroblock_tree( h, &a, frames, 0, keyframe );
1291 return;
1292+ }
1293
1294 keyint_limit = h->param.i_keyint_max - frames[0]->i_frame + h->lookahead->i_last_coded_keyframe - 1;
1295 orig_num_frames = num_frames = h->param.b_intra_refresh ? framecnt : X264_MIN( framecnt, keyint_limit );
1296@@ -1085,15 +1112,8 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
1297 * there will be significant visual artifacts if the frames just before
1298 * go down in quality due to being referenced less, despite it being
1299 * more RD-optimal. */
1300- if( (h->param.analyse.b_psy && h->param.rc.b_mb_tree) || h->param.rc.i_vbv_buffer_size )
1301+ if( (h->param.analyse.b_psy && h->param.rc.b_mb_tree) || vbv_lookahead )
1302 num_frames = framecnt;
1303- else if( num_frames == 1 )
1304- {
1305- frames[1]->i_type = X264_TYPE_P;
1306- if( h->param.i_scenecut_threshold && scenecut( h, &a, frames, 0, 1, 1, orig_num_frames ) )
1307- frames[1]->i_type = X264_TYPE_I;
1308- return;
1309- }
1310 else if( num_frames == 0 )
1311 {
1312 frames[1]->i_type = X264_TYPE_I;
1313@@ -1224,7 +1244,7 @@ void x264_slicetype_analyse( x264_t *h, int keyframe )
1314 i = j;
1315 }
1316
1317- if( h->param.rc.i_vbv_buffer_size )
1318+ if( vbv_lookahead )
1319 x264_vbv_lookahead( h, &a, frames, num_frames, keyframe );
1320
1321 /* Restore frametypes for all frames that haven't actually been decided yet. */
1322diff --git a/x264.c b/x264.c
1323index eba72c5..4265a3b 100644
1324--- a/x264.c
1325+++ b/x264.c
1326@@ -285,16 +285,16 @@ static void Help( x264_param_t *defaults, int longhelp )
1327 " --no-8x8dct --aq-mode 0 --b-adapt 0\n"
1328 " --bframes 0 --no-cabac --no-deblock\n"
1329 " --no-mbtree --me dia --no-mixed-refs\n"
1330- " --partitions none --ref 1 --scenecut 0\n"
1331- " --subme 0 --trellis 0 --no-weightb\n"
1332- " --weightp 0\n"
1333+ " --partitions none --rc-lookahead 0 --ref 1\n"
1334+ " --scenecut 0 --subme 0 --trellis 0\n"
1335+ " --no-weightb --weightp 0\n"
1336 " - superfast:\n"
1337 " --no-mbtree --me dia --no-mixed-refs\n"
1338- " --partitions i8x8,i4x4 --ref 1\n"
1339- " --subme 1 --trellis 0 --weightp 0\n"
1340+ " --partitions i8x8,i4x4 --rc-lookahead 0\n"
1341+ " --ref 1 --subme 1 --trellis 0 --weightp 0\n"
1342 " - veryfast:\n"
1343- " --no-mbtree --no-mixed-refs --ref 1\n"
1344- " --subme 2 --trellis 0 --weightp 0\n"
1345+ " --no-mixed-refs --rc-lookahead 10\n"
1346+ " --ref 1 --subme 2 --trellis 0 --weightp 0\n"
1347 " - faster:\n"
1348 " --no-mixed-refs --rc-lookahead 20\n"
1349 " --ref 2 --subme 4 --weightp 1\n"
1350--
13511.7.0.4
1352
1353
1354From f0505f9c3c9c4d6e7643cb878ea72192abef2420 Mon Sep 17 00:00:00 2001
1355From: Anton Mitrofanov <BugMaster@narod.ru>
1356Date: Sat, 19 Jun 2010 01:44:56 +0400
1357Subject: [PATCH 5/9] Fix SIGPIPEs caused by is_regular_file checks
1358 Check to see if input file is a pipe without opening it.
1359
1360---
1361 common/osdep.h | 10 +++++++++-
1362 x264.c | 1 +
1363 2 files changed, 10 insertions(+), 1 deletions(-)
1364
1365diff --git a/common/osdep.h b/common/osdep.h
1366index b1b357c..b3a8cd6 100644
1367--- a/common/osdep.h
1368+++ b/common/osdep.h
1369@@ -290,7 +290,15 @@ static inline uint8_t x264_is_regular_file( FILE *filehandle )
1370 {
1371 struct stat file_stat;
1372 if( fstat( fileno( filehandle ), &file_stat ) )
1373- return 0;
1374+ return -1;
1375+ return S_ISREG( file_stat.st_mode );
1376+}
1377+
1378+static inline uint8_t x264_is_regular_file_path( const char *filename )
1379+{
1380+ struct stat file_stat;
1381+ if( stat( filename, &file_stat ) )
1382+ return -1;
1383 return S_ISREG( file_stat.st_mode );
1384 }
1385
1386diff --git a/x264.c b/x264.c
1387index 4265a3b..25609a3 100644
1388--- a/x264.c
1389+++ b/x264.c
1390@@ -810,6 +810,7 @@ static int select_input( const char *demuxer, char *used_demuxer, char *filename
1391 int b_auto = !strcasecmp( demuxer, "auto" );
1392 if( !b_regular && b_auto )
1393 ext = "yuv";
1394+ b_regular = b_regular && x264_is_regular_file_path( filename );
1395 if( b_regular )
1396 {
1397 FILE *f = fopen( filename, "r" );
1398--
13991.7.0.4
1400
1401
1402From 99a6182a8232083a641cc0423f56407e0589c313 Mon Sep 17 00:00:00 2001
1403From: Yusuke Nakamura <muken.the.vfrmaniac@gmail.com>
1404Date: Fri, 18 Jun 2010 14:57:52 -0700
1405Subject: [PATCH 6/9] Properly close qpfile on errors
1406
1407---
1408 encoder/encoder.c | 1 +
1409 input/avs.c | 1 +
1410 output/mp4.c | 1 +
1411 3 files changed, 3 insertions(+), 0 deletions(-)
1412
1413diff --git a/encoder/encoder.c b/encoder/encoder.c
1414index 2b0e017..cd65da2 100644
1415--- a/encoder/encoder.c
1416+++ b/encoder/encoder.c
1417@@ -1137,6 +1137,7 @@ x264_t *x264_encoder_open( x264_param_t *param )
1418 else if( !x264_is_regular_file( f ) )
1419 {
1420 x264_log( h, X264_LOG_ERROR, "dump_yuv: incompatible with non-regular file %s\n", h->param.psz_dump_yuv );
1421+ fclose( f );
1422 goto fail;
1423 }
1424 fclose( f );
1425diff --git a/input/avs.c b/input/avs.c
1426index 07add40..849c465 100644
1427--- a/input/avs.c
1428+++ b/input/avs.c
1429@@ -134,6 +134,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle, video_info_t *info, c
1430 else if( !x264_is_regular_file( fh ) )
1431 {
1432 fprintf( stderr, "avs [error]: AVS input is incompatible with non-regular file `%s'\n", psz_filename );
1433+ fclose( fh );
1434 return -1;
1435 }
1436 fclose( fh );
1437diff --git a/output/mp4.c b/output/mp4.c
1438index 0e3c2fc..9b35a2f 100644
1439--- a/output/mp4.c
1440+++ b/output/mp4.c
1441@@ -166,6 +166,7 @@ static int open_file( char *psz_filename, hnd_t *p_handle )
1442 else if( !x264_is_regular_file( fh ) )
1443 {
1444 fprintf( stderr, "mp4 [error]: MP4 output is incompatible with non-regular file `%s'\n", psz_filename );
1445+ fclose( fh );
1446 return -1;
1447 }
1448 fclose( fh );
1449--
14501.7.0.4
1451
1452
1453From 9dda85ea508aa5f3e5d3d44c80fee7b33caaea49 Mon Sep 17 00:00:00 2001
1454From: Jason Garrett-Glaser <darkshikari@gmail.com>
1455Date: Fri, 18 Jun 2010 13:58:11 -0700
1456Subject: [PATCH 7/9] sse4 and ssse3 versions of some intra_sad functions
1457
1458---
1459 common/pixel.c | 5 ++
1460 common/x86/pixel.h | 2 +
1461 common/x86/sad-a.asm | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++
1462 3 files changed, 114 insertions(+), 0 deletions(-)
1463
1464diff --git a/common/pixel.c b/common/pixel.c
1465index a8cb1df..7fcc91a 100644
1466--- a/common/pixel.c
1467+++ b/common/pixel.c
1468@@ -856,6 +856,11 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
1469 }
1470 pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_sse4;
1471 pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_sse4;
1472+ pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_sse4;
1473+
1474+ /* Only faster on Nehalem */
1475+ if( cpu&X264_CPU_SSE42 )
1476+ pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_ssse3;
1477 }
1478 #endif //HAVE_MMX
1479
1480diff --git a/common/x86/pixel.h b/common/x86/pixel.h
1481index 9bba683..b1b916d 100644
1482--- a/common/x86/pixel.h
1483+++ b/common/x86/pixel.h
1484@@ -80,6 +80,7 @@ DECL_PIXELS( uint64_t, hadamard_ac, sse4, ( uint8_t *pix, int i_stride ))
1485 void x264_intra_satd_x3_4x4_mmxext ( uint8_t *, uint8_t *, int * );
1486 void x264_intra_satd_x3_4x4_ssse3 ( uint8_t *, uint8_t *, int * );
1487 void x264_intra_sad_x3_4x4_mmxext ( uint8_t *, uint8_t *, int * );
1488+void x264_intra_sad_x3_4x4_sse4 ( uint8_t *, uint8_t *, int * );
1489 void x264_intra_satd_x3_8x8c_mmxext ( uint8_t *, uint8_t *, int * );
1490 void x264_intra_satd_x3_8x8c_ssse3 ( uint8_t *, uint8_t *, int * );
1491 void x264_intra_sad_x3_8x8c_mmxext ( uint8_t *, uint8_t *, int * );
1492@@ -93,6 +94,7 @@ void x264_intra_sa8d_x3_8x8_mmxext ( uint8_t *, uint8_t *, int * );
1493 void x264_intra_sa8d_x3_8x8_sse2 ( uint8_t *, uint8_t *, int * );
1494 void x264_intra_sa8d_x3_8x8_ssse3 ( uint8_t *, uint8_t *, int * );
1495 void x264_intra_sad_x3_8x8_mmxext ( uint8_t *, uint8_t *, int * );
1496+void x264_intra_sad_x3_8x8_ssse3 ( uint8_t *, uint8_t *, int * );
1497 void x264_intra_sa8d_x3_8x8_core_mmxext( uint8_t *, int16_t [2][8], int * );
1498 void x264_intra_sa8d_x3_8x8_core_sse2 ( uint8_t *, int16_t [2][8], int * );
1499 void x264_intra_sa8d_x3_8x8_core_ssse3 ( uint8_t *, int16_t [2][8], int * );
1500diff --git a/common/x86/sad-a.asm b/common/x86/sad-a.asm
1501index 72c1789..10a365c 100644
1502--- a/common/x86/sad-a.asm
1503+++ b/common/x86/sad-a.asm
1504@@ -26,6 +26,19 @@
1505 %include "x86inc.asm"
1506 %include "x86util.asm"
1507
1508+SECTION_RODATA
1509+
1510+h4x4_pred_shuf: db 3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15
1511+h4x4_pred_shuf2: db 3,7,11,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1512+h8x8_pred_shuf: times 8 db 1
1513+ times 8 db 0
1514+ times 8 db 3
1515+ times 8 db 2
1516+ times 8 db 5
1517+ times 8 db 4
1518+ times 8 db 7
1519+ times 8 db 6
1520+
1521 SECTION .text
1522
1523 cextern pb_3
1524@@ -303,6 +316,40 @@ cglobal intra_sad_x3_4x4_mmxext, 3,3
1525 movd [r2+4], mm1 ;H prediction cost
1526 RET
1527
1528+cglobal intra_sad_x3_4x4_sse4, 3,3
1529+ movd xmm4, [r1+FDEC_STRIDE*0-4]
1530+ pinsrd xmm4, [r1+FDEC_STRIDE*1-4], 1
1531+ pinsrd xmm4, [r1+FDEC_STRIDE*2-4], 2
1532+ pinsrd xmm4, [r1+FDEC_STRIDE*3-4], 3
1533+ movd xmm2, [r1-FDEC_STRIDE]
1534+ pxor xmm3, xmm3
1535+ movdqa xmm5, xmm4
1536+ pshufb xmm4, [h4x4_pred_shuf2] ; EFGH
1537+ pshufb xmm5, [h4x4_pred_shuf] ; EEEEFFFFGGGGHHHH
1538+ pshufd xmm0, xmm2, 0 ; ABCDABCDABCDABCD
1539+ punpckldq xmm2, xmm4 ; ABCDEFGH
1540+ psadbw xmm2, xmm3
1541+ movd xmm1, [r0+FENC_STRIDE*0]
1542+ pinsrd xmm1, [r0+FENC_STRIDE*1], 1
1543+ pinsrd xmm1, [r0+FENC_STRIDE*2], 2
1544+ pinsrd xmm1, [r0+FENC_STRIDE*3], 3
1545+ psadbw xmm0, xmm1
1546+ psadbw xmm5, xmm1
1547+ psraw xmm2, 2
1548+ pavgw xmm2, xmm3
1549+ pshufb xmm2, xmm3 ; DC prediction
1550+ movdqa xmm3, xmm0
1551+ punpcklqdq xmm0, xmm5
1552+ punpckhqdq xmm3, xmm5
1553+ psadbw xmm2, xmm1
1554+ paddw xmm0, xmm3
1555+ movhlps xmm4, xmm2
1556+ packusdw xmm0, xmm0
1557+ paddw xmm2, xmm4
1558+ movq [r2], xmm0 ; V/H prediction costs
1559+ movd [r2+8], xmm2 ; DC prediction cost
1560+ RET
1561+
1562 ;-----------------------------------------------------------------------------
1563 ; void intra_sad_x3_8x8( uint8_t *fenc, uint8_t edge[33], int res[3]);
1564 ;-----------------------------------------------------------------------------
1565@@ -370,6 +417,66 @@ cglobal intra_sad_x3_8x8_mmxext, 3,3
1566 movd [r2+8], m1
1567 RET
1568
1569+INIT_XMM
1570+cglobal intra_sad_x3_8x8_ssse3, 3,4,9
1571+%ifdef PIC
1572+ lea r11, [h8x8_pred_shuf]
1573+%define shuf r11
1574+%else
1575+%define shuf h8x8_pred_shuf
1576+%endif
1577+ movq m0, [r1+7] ; left pixels
1578+ movq m1, [r1+16] ; top pixels
1579+ pxor m2, m2
1580+ pxor m3, m3
1581+ psadbw m2, m0
1582+ psadbw m3, m1
1583+ paddw m2, m3
1584+ pxor m3, m3 ; V score accumulator
1585+ psraw m2, 3
1586+ pavgw m2, m3
1587+ punpcklqdq m1, m1 ; V prediction
1588+ pshufb m2, m3 ; DC prediction
1589+ pxor m4, m4 ; H score accumulator
1590+ pxor m5, m5 ; DC score accumulator
1591+ mov r3d, 6
1592+.loop:
1593+ movq m6, [r0+FENC_STRIDE*0]
1594+ movhps m6, [r0+FENC_STRIDE*1]
1595+ movdqa m7, m0
1596+ pshufb m7, [shuf+r3*8] ; H prediction
1597+%ifdef ARCH_X86_64
1598+ movdqa m8, m1
1599+ psadbw m7, m6
1600+ psadbw m8, m6
1601+ psadbw m6, m2
1602+ paddw m4, m7
1603+ paddw m3, m8
1604+ paddw m5, m6
1605+%else
1606+ psadbw m7, m6
1607+ paddw m4, m7
1608+ movdqa m7, m1
1609+ psadbw m7, m6
1610+ psadbw m6, m2
1611+ paddw m3, m7
1612+ paddw m5, m6
1613+%endif
1614+ add r0, FENC_STRIDE*2
1615+ sub r3d, 2
1616+ jge .loop
1617+
1618+ movhlps m0, m3
1619+ movhlps m1, m4
1620+ movhlps m2, m5
1621+ paddw m3, m0
1622+ paddw m4, m1
1623+ paddw m5, m2
1624+ movd [r2+0], m3
1625+ movd [r2+4], m4
1626+ movd [r2+8], m5
1627+ RET
1628+
1629 ;-----------------------------------------------------------------------------
1630 ; void intra_sad_x3_8x8c( uint8_t *fenc, uint8_t *fdec, int res[3] );
1631 ;-----------------------------------------------------------------------------
1632--
16331.7.0.4
1634
1635
1636From 4e190ca01a0717f9d4056213226b57eb4a6d1b69 Mon Sep 17 00:00:00 2001
1637From: Jason Garrett-Glaser <darkshikari@gmail.com>
1638Date: Sat, 19 Jun 2010 01:41:07 -0700
1639Subject: [PATCH 8/9] Improve 2-pass bitrate prediction
1640 Adapt based on distance to the end in bits, not in frames.
1641 Helps in videos with absurdly simple end sections, e.g. black frames.
1642
1643---
1644 encoder/ratecontrol.c | 12 +++++++++---
1645 1 files changed, 9 insertions(+), 3 deletions(-)
1646
1647diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
1648index f30df22..6668c18 100644
1649--- a/encoder/ratecontrol.c
1650+++ b/encoder/ratecontrol.c
1651@@ -2014,9 +2014,6 @@ static float rate_estimate_qscale( x264_t *h )
1652 double lmax = rcc->lmax[pict_type];
1653 int64_t diff;
1654 int64_t predicted_bits = total_bits;
1655- /* Adjust ABR buffer based on distance to the end of the video. */
1656- if( rcc->num_entries > h->i_frame )
1657- abr_buffer *= 0.5 * sqrt( rcc->num_entries - h->i_frame );
1658
1659 if( rcc->b_vbv )
1660 {
1661@@ -2042,6 +2039,15 @@ static float rate_estimate_qscale( x264_t *h )
1662 predicted_bits += (int64_t)(h->i_thread_frames - 1) * rcc->bitrate / rcc->fps;
1663 }
1664
1665+ /* Adjust ABR buffer based on distance to the end of the video. */
1666+ if( rcc->num_entries > h->i_frame )
1667+ {
1668+ double final_bits = rcc->entry[rcc->num_entries-1].expected_bits;
1669+ double video_pos = rce.expected_bits / final_bits;
1670+ double scale_factor = sqrt( (1 - video_pos) * rcc->num_entries );
1671+ abr_buffer *= 0.5 * X264_MAX( scale_factor, 0.5 );
1672+ }
1673+
1674 diff = predicted_bits - (int64_t)rce.expected_bits;
1675 q = rce.new_qscale;
1676 q /= x264_clip3f((double)(abr_buffer - diff) / abr_buffer, .5, 2);
1677--
16781.7.0.4
1679
1680
1681From 62e9a31ca2eca59f0d8f54c104d8f7229af4f78c Mon Sep 17 00:00:00 2001
1682From: Jason Garrett-Glaser <darkshikari@gmail.com>
1683Date: Sat, 19 Jun 2010 03:27:33 -0700
1684Subject: [PATCH 9/9] Attempt to fix rounding errors in HRD
1685 In a staggering display of brain damage, the spec requires all HRD math to be done in infinite precision despite the output being of quite limited precision.
1686 Accordingly, convert buffer management to work in units of timescale.
1687 This should fix problems with accumulating rounding errors in HRD information.
1688
1689---
1690 common/common.c | 36 ++++++++++++++-----------
1691 common/common.h | 1 +
1692 encoder/encoder.c | 3 +-
1693 encoder/ratecontrol.c | 67 ++++++++++++++++++++++++++++++++-----------------
1694 encoder/ratecontrol.h | 2 +-
1695 5 files changed, 67 insertions(+), 42 deletions(-)
1696
1697diff --git a/common/common.c b/common/common.c
1698index 9e86f93..44fd68a 100644
1699--- a/common/common.c
1700+++ b/common/common.c
1701@@ -1084,24 +1084,28 @@ void x264_free( void *p )
1702 /****************************************************************************
1703 * x264_reduce_fraction:
1704 ****************************************************************************/
1705-void x264_reduce_fraction( uint32_t *n, uint32_t *d )
1706-{
1707- uint32_t a = *n;
1708- uint32_t b = *d;
1709- uint32_t c;
1710- if( !a || !b )
1711- return;
1712- c = a % b;
1713- while(c)
1714- {
1715- a = b;
1716- b = c;
1717- c = a % b;
1718- }
1719- *n /= b;
1720- *d /= b;
1721+#define REDUCE_FRACTION( name, type )\
1722+void name( type *n, type *d )\
1723+{ \
1724+ type a = *n; \
1725+ type b = *d; \
1726+ type c; \
1727+ if( !a || !b ) \
1728+ return; \
1729+ c = a % b; \
1730+ while( c ) \
1731+ { \
1732+ a = b; \
1733+ b = c; \
1734+ c = a % b; \
1735+ } \
1736+ *n /= b; \
1737+ *d /= b; \
1738 }
1739
1740+REDUCE_FRACTION( x264_reduce_fraction, uint32_t )
1741+REDUCE_FRACTION( x264_reduce_fraction64, uint64_t )
1742+
1743 /****************************************************************************
1744 * x264_slurp_file:
1745 ****************************************************************************/
1746diff --git a/common/common.h b/common/common.h
1747index 19e5d32..fee9398 100644
1748--- a/common/common.h
1749+++ b/common/common.h
1750@@ -184,6 +184,7 @@ char *x264_param2string( x264_param_t *p, int b_res );
1751 void x264_log( x264_t *h, int i_level, const char *psz_fmt, ... );
1752
1753 void x264_reduce_fraction( uint32_t *n, uint32_t *d );
1754+void x264_reduce_fraction64( uint64_t *n, uint64_t *d );
1755 void x264_init_vlc_tables();
1756
1757 static ALWAYS_INLINE pixel x264_clip_pixel( int x )
1758diff --git a/encoder/encoder.c b/encoder/encoder.c
1759index cd65da2..c49ea1a 100644
1760--- a/encoder/encoder.c
1761+++ b/encoder/encoder.c
1762@@ -2593,8 +2593,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
1763 /* generate sei buffering period and insert it into place */
1764 if( h->fenc->b_keyframe && h->sps->vui.b_nal_hrd_parameters_present )
1765 {
1766- h->initial_cpb_removal_delay = x264_hrd_fullness( h );
1767-
1768+ x264_hrd_fullness( h );
1769 x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
1770 x264_sei_buffering_period_write( h, &h->out.bs );
1771 if( x264_nal_end( h ) )
1772diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c
1773index 6668c18..b37d9be 100644
1774--- a/encoder/ratecontrol.c
1775+++ b/encoder/ratecontrol.c
1776@@ -91,7 +91,7 @@ struct x264_ratecontrol_t
1777
1778 /* VBV stuff */
1779 double buffer_size;
1780- double buffer_fill_final; /* real buffer as of the last finished frame */
1781+ int64_t buffer_fill_final;
1782 double buffer_fill; /* planned buffer, if all in-progress frames hit their bit budget */
1783 double buffer_rate; /* # of bits added to buffer_fill after each frame */
1784 double vbv_max_rate; /* # of bits added to buffer_fill per second */
1785@@ -157,6 +157,7 @@ struct x264_ratecontrol_t
1786 int initial_cpb_removal_delay_offset;
1787 double nrt_first_access_unit; /* nominal removal time */
1788 double previous_cpb_final_arrival_time;
1789+ uint64_t hrd_multiply_denom;
1790 };
1791
1792
1793@@ -463,6 +464,8 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
1794 int vbv_max_bitrate = h->param.rc.i_vbv_max_bitrate * 1000;
1795
1796 /* Init HRD */
1797+ h->sps->vui.hrd.i_bit_rate_unscaled = vbv_max_bitrate;
1798+ h->sps->vui.hrd.i_cpb_size_unscaled = vbv_buffer_size;
1799 if( h->param.i_nal_hrd && b_init )
1800 {
1801 h->sps->vui.hrd.i_cpb_cnt = 1;
1802@@ -499,8 +502,8 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
1803
1804 #undef MAX_DURATION
1805
1806- vbv_buffer_size = X264_MIN( vbv_buffer_size, h->sps->vui.hrd.i_cpb_size_unscaled );
1807- vbv_max_bitrate = X264_MIN( vbv_max_bitrate, h->sps->vui.hrd.i_bit_rate_unscaled );
1808+ vbv_buffer_size = h->sps->vui.hrd.i_cpb_size_unscaled;
1809+ vbv_max_bitrate = h->sps->vui.hrd.i_bit_rate_unscaled;
1810 }
1811 else if( h->param.i_nal_hrd && !b_init )
1812 {
1813@@ -528,7 +531,7 @@ void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init )
1814 if( h->param.rc.f_vbv_buffer_init > 1. )
1815 h->param.rc.f_vbv_buffer_init = x264_clip3f( h->param.rc.f_vbv_buffer_init / h->param.rc.i_vbv_buffer_size, 0, 1 );
1816 h->param.rc.f_vbv_buffer_init = x264_clip3f( X264_MAX( h->param.rc.f_vbv_buffer_init, rc->buffer_rate / rc->buffer_size ), 0, 1);
1817- rc->buffer_fill_final = rc->buffer_size * h->param.rc.f_vbv_buffer_init;
1818+ rc->buffer_fill_final = rc->buffer_size * h->param.rc.f_vbv_buffer_init * h->sps->vui.i_time_scale;
1819 rc->b_vbv = 1;
1820 rc->b_vbv_min_rate = !rc->b_2pass
1821 && h->param.rc.i_rc_method == X264_RC_ABR
1822@@ -577,6 +580,23 @@ int x264_ratecontrol_new( x264_t *h )
1823
1824 x264_ratecontrol_init_reconfigurable( h, 1 );
1825
1826+ if( h->param.i_nal_hrd )
1827+ {
1828+ uint64_t denom = (uint64_t)h->sps->vui.hrd.i_bit_rate_unscaled * h->sps->vui.i_time_scale;
1829+ uint64_t num = 180000;
1830+ x264_reduce_fraction64( &num, &denom );
1831+ rc->hrd_multiply_denom = 180000 / num;
1832+
1833+ double bits_required = log2( 180000 / rc->hrd_multiply_denom )
1834+ + log2( h->sps->vui.i_time_scale )
1835+ + log2( h->sps->vui.hrd.i_cpb_size_unscaled );
1836+ if( bits_required >= 63 )
1837+ {
1838+ x264_log( h, X264_LOG_ERROR, "HRD with very large timescale and bufsize not supported\n" );
1839+ return -1;
1840+ }
1841+ }
1842+
1843 if( rc->rate_tolerance < 0.01 )
1844 {
1845 x264_log(h, X264_LOG_WARNING, "bitrate tolerance too small, using .01\n");
1846@@ -1723,9 +1743,10 @@ static void update_predictor( predictor_t *p, double q, double var, double bits
1847 static int update_vbv( x264_t *h, int bits )
1848 {
1849 int filler = 0;
1850-
1851+ int bitrate = h->sps->vui.hrd.i_bit_rate_unscaled;
1852 x264_ratecontrol_t *rcc = h->rc;
1853 x264_ratecontrol_t *rct = h->thread[0]->rc;
1854+ uint64_t buffer_size = (uint64_t)h->sps->vui.hrd.i_cpb_size_unscaled * h->sps->vui.i_time_scale;
1855
1856 if( rcc->last_satd >= h->mb.i_mb_count )
1857 update_predictor( &rct->pred[h->sh.i_type], qp2qscale( rcc->qpa_rc ), rcc->last_satd, bits );
1858@@ -1733,48 +1754,48 @@ static int update_vbv( x264_t *h, int bits )
1859 if( !rcc->b_vbv )
1860 return filler;
1861
1862- rct->buffer_fill_final -= bits;
1863+ rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
1864
1865 if( rct->buffer_fill_final < 0 )
1866- x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, rct->buffer_fill_final );
1867+ x264_log( h, X264_LOG_WARNING, "VBV underflow (frame %d, %.0f bits)\n", h->i_frame, (double)rct->buffer_fill_final / h->sps->vui.i_time_scale );
1868 rct->buffer_fill_final = X264_MAX( rct->buffer_fill_final, 0 );
1869- rct->buffer_fill_final += rcc->buffer_rate;
1870+ rct->buffer_fill_final += (uint64_t)bitrate * h->sps->vui.i_num_units_in_tick * h->fenc->i_cpb_duration;
1871
1872- if( h->sps->vui.hrd.b_cbr_hrd && rct->buffer_fill_final > rcc->buffer_size )
1873+ if( h->sps->vui.hrd.b_cbr_hrd && rct->buffer_fill_final > buffer_size )
1874 {
1875- filler = ceil( (rct->buffer_fill_final - rcc->buffer_size) / 8 );
1876- rct->buffer_fill_final -= X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
1877+ filler = ceil( (rct->buffer_fill_final - buffer_size) / (8. * h->sps->vui.i_time_scale) );
1878+ bits = X264_MAX( (FILLER_OVERHEAD - h->param.b_annexb), filler ) * 8;
1879+ rct->buffer_fill_final -= (uint64_t)bits * h->sps->vui.i_time_scale;
1880 }
1881 else
1882- rct->buffer_fill_final = X264_MIN( rct->buffer_fill_final, rcc->buffer_size );
1883+ rct->buffer_fill_final = X264_MIN( rct->buffer_fill_final, buffer_size );
1884
1885 return filler;
1886 }
1887
1888-int x264_hrd_fullness( x264_t *h )
1889+void x264_hrd_fullness( x264_t *h )
1890 {
1891 x264_ratecontrol_t *rct = h->thread[0]->rc;
1892- double cpb_bits = rct->buffer_fill_final;
1893- double bps = h->sps->vui.hrd.i_bit_rate_unscaled;
1894- double cpb_size = h->sps->vui.hrd.i_cpb_size_unscaled;
1895- double cpb_fullness = 90000.0*cpb_bits/bps;
1896+ uint64_t denom = (uint64_t)h->sps->vui.hrd.i_bit_rate_unscaled * h->sps->vui.i_time_scale / rct->hrd_multiply_denom;
1897+ uint64_t cpb_state = rct->buffer_fill_final;
1898+ uint64_t cpb_size = (uint64_t)h->sps->vui.hrd.i_cpb_size_unscaled * h->sps->vui.i_time_scale;
1899+ uint64_t multiply_factor = 180000 / rct->hrd_multiply_denom;
1900
1901- if( cpb_bits < 0 || cpb_bits > cpb_size )
1902+ if( cpb_state < 0 || cpb_state > cpb_size )
1903 {
1904 x264_log( h, X264_LOG_WARNING, "CPB %s: %.0lf bits in a %.0lf-bit buffer\n",
1905- cpb_bits < 0 ? "underflow" : "overflow", cpb_bits, cpb_size );
1906+ cpb_state < 0 ? "underflow" : "overflow", (float)cpb_state/denom, (float)cpb_size/denom );
1907 }
1908
1909- h->initial_cpb_removal_delay_offset = 90000.0*(cpb_size - cpb_bits)/bps;
1910-
1911- return x264_clip3f( cpb_fullness + 0.5, 0, 90000.0*cpb_size/bps ); // just lie if we are in a weird state
1912+ h->initial_cpb_removal_delay = (multiply_factor * cpb_state + denom) / (2*denom);
1913+ h->initial_cpb_removal_delay_offset = (multiply_factor * cpb_size + denom) / (2*denom) - h->initial_cpb_removal_delay;
1914 }
1915
1916 // provisionally update VBV according to the planned size of all frames currently in progress
1917 static void update_vbv_plan( x264_t *h, int overhead )
1918 {
1919 x264_ratecontrol_t *rcc = h->rc;
1920- rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final;
1921+ rcc->buffer_fill = h->thread[0]->rc->buffer_fill_final / h->sps->vui.i_time_scale;
1922 if( h->i_thread_frames > 1 )
1923 {
1924 int j = h->rc - h->thread[0]->rc;
1925diff --git a/encoder/ratecontrol.h b/encoder/ratecontrol.h
1926index dd139eb..f39c070 100644
1927--- a/encoder/ratecontrol.h
1928+++ b/encoder/ratecontrol.h
1929@@ -47,6 +47,6 @@ int x264_rc_analyse_slice( x264_t *h );
1930 int x264_weighted_reference_duplicate( x264_t *h, int i_ref, const x264_weight_t *w );
1931 void x264_threads_distribute_ratecontrol( x264_t *h );
1932 void x264_threads_merge_ratecontrol( x264_t *h );
1933-int x264_hrd_fullness( x264_t *h );
1934+void x264_hrd_fullness( x264_t *h );
1935 #endif
1936
1937--
19381.7.0.4