· 7 years ago · Nov 15, 2018, 05:54 PM
1
2%% bare_jrnl.tex
3%% V1.3
4%% 2007/01/11
5%% by Michael Shell
6%% see http://www.michaelshell.org/
7%% for current contact information.
8%%
9%% This is a skeleton file demonstrating the use of IEEEtran.cls
10%% (requires IEEEtran.cls version 1.7 or later) with an IEEE journal paper.
11%%
12%% Support sites:
13%% http://www.michaelshell.org/tex/ieeetran/
14%% http://www.ctan.org/tex-archive/macros/latex/contrib/IEEEtran/
15%% and
16%% http://www.ieee.org/
17
18
19
20% *** Authors should verify (and, if needed, correct) their LaTeX system ***
21% *** with the testflow diagnostic prior to trusting their LaTeX platform ***
22% *** with production work. IEEE's font choices can trigger bugs that do ***
23% *** not appear when using other class files. ***
24% The testflow support page is at:
25% http://www.michaelshell.org/tex/testflow/
26
27
28%%*************************************************************************
29%% Legal Notice:
30%% This code is offered as-is without any warranty either expressed or
31%% implied; without even the implied warranty of MERCHANTABILITY or
32%% FITNESS FOR A PARTICULAR PURPOSE!
33%% User assumes all risk.
34%% In no event shall IEEE or any contributor to this code be liable for
35%% any damages or losses, including, but not limited to, incidental,
36%% consequential, or any other damages, resulting from the use or misuse
37%% of any information contained here.
38%%
39%% All comments are the opinions of their respective authors and are not
40%% necessarily endorsed by the IEEE.
41%%
42%% This work is distributed under the LaTeX Project Public License (LPPL)
43%% ( http://www.latex-project.org/ ) version 1.3, and may be freely used,
44%% distributed and modified. A copy of the LPPL, version 1.3, is included
45%% in the base LaTeX documentation of all distributions of LaTeX released
46%% 2003/12/01 or later.
47%% Retain all contribution notices and credits.
48%% ** Modified files should be clearly indicated as such, including **
49%% ** renaming them and changing author support contact information. **
50%%
51%% File list of work: IEEEtran.cls, IEEEtran_HOWTO.pdf, bare_adv.tex,
52%% bare_conf.tex, bare_jrnl.tex, bare_jrnl_compsoc.tex
53%%*************************************************************************
54
55% Note that the a4paper option is mainly intended so that authors in
56% countries using A4 can easily print to A4 and see how their papers will
57% look in print - the typesetting of the document will not typically be
58% affected with changes in paper size (but the bottom and side margins will).
59% Use the testflow package mentioned above to verify correct handling of
60% both paper sizes by the user's LaTeX system.
61%
62% Also note that the "draftcls" or "draftclsnofoot", not "draft", option
63% should be used if it is desired that the figures are to be displayed in
64% draft mode.
65%
66\documentclass[journal]{journal}
67%
68% If IEEEtran.cls has not been installed into the LaTeX system files,
69% manually specify the path to it like:
70% \documentclass[journal]{../sty/IEEEtran}
71
72
73
74
75
76% Some very useful LaTeX packages include:
77% (uncomment the ones you want to load)
78
79
80% *** MISC UTILITY PACKAGES ***
81%
82%\usepackage{ifpdf}
83% Heiko Oberdiek's ifpdf.sty is very useful if you need conditional
84% compilation based on whether the output is pdf or dvi.
85% usage:
86% \ifpdf
87% % pdf code
88% \else
89% % dvi code
90% \fi
91% The latest version of ifpdf.sty can be obtained from:
92% http://www.ctan.org/tex-archive/macros/latex/contrib/oberdiek/
93% Also, note that IEEEtran.cls V1.7 and later provides a builtin
94% \ifCLASSINFOpdf conditional that works the same way.
95% When switching from latex to pdflatex and vice-versa, the compiler may
96% have to be run twice to clear warning/error messages.
97
98
99
100
101
102
103% *** CITATION PACKAGES ***
104%
105%\usepackage{cite}
106% cite.sty was written by Donald Arseneau
107% V1.6 and later of IEEEtran pre-defines the format of the cite.sty package
108% \cite{} output to follow that of IEEE. Loading the cite package will
109% result in citation numbers being automatically sorted and properly
110% "compressed/ranged". e.g., [1], [9], [2], [7], [5], [6] without using
111% cite.sty will become [1], [2], [5]--[7], [9] using cite.sty. cite.sty's
112% \cite will automatically add leading space, if needed. Use cite.sty's
113% noadjust option (cite.sty V3.8 and later) if you want to turn this off.
114% cite.sty is already installed on most LaTeX systems. Be sure and use
115% version 4.0 (2003-05-27) and later if using hyperref.sty. cite.sty does
116% not currently provide for hyperlinked citations.
117% The latest version can be obtained at:
118% http://www.ctan.org/tex-archive/macros/latex/contrib/cite/
119% The documentation is contained in the cite.sty file itself.
120
121
122
123
124
125
126% *** GRAPHICS RELATED PACKAGES ***
127%
128\ifCLASSINFOpdf
129 % \usepackage[pdftex]{graphicx}
130 % declare the path(s) where your graphic files are
131 % \graphicspath{{../pdf/}{../jpeg/}}
132 % and their extensions so you won't have to specify these with
133 % every instance of \includegraphics
134 % \DeclareGraphicsExtensions{.pdf,.jpeg,.png}
135\else
136 % or other class option (dvipsone, dvipdf, if not using dvips). graphicx
137 % will default to the driver specified in the system graphics.cfg if no
138 % driver is specified.
139 % \usepackage[dvips]{graphicx}
140 % declare the path(s) where your graphic files are
141 % \graphicspath{{../eps/}}
142 % and their extensions so you won't have to specify these with
143 % every instance of \includegraphics
144 % \DeclareGraphicsExtensions{.eps}
145\fi
146% graphicx was written by David Carlisle and Sebastian Rahtz. It is
147% required if you want graphics, photos, etc. graphicx.sty is already
148% installed on most LaTeX systems. The latest version and documentation can
149% be obtained at:
150% http://www.ctan.org/tex-archive/macros/latex/required/graphics/
151% Another good source of documentation is "Using Imported Graphics in
152% LaTeX2e" by Keith Reckdahl which can be found as epslatex.ps or
153% epslatex.pdf at: http://www.ctan.org/tex-archive/info/
154%
155% latex, and pdflatex in dvi mode, support graphics in encapsulated
156% postscript (.eps) format. pdflatex in pdf mode supports graphics
157% in .pdf, .jpeg, .png and .mps (metapost) formats. Users should ensure
158% that all non-photo figures use a vector format (.eps, .pdf, .mps) and
159% not a bitmapped formats (.jpeg, .png). IEEE frowns on bitmapped formats
160% which can result in "jaggedy"/blurry rendering of lines and letters as
161% well as large increases in file sizes.
162%
163% You can find documentation about the pdfTeX application at:
164% http://www.tug.org/applications/pdftex
165
166
167
168
169
170% *** MATH PACKAGES ***
171%
172\usepackage[cmex10]{amsmath}
173% A popular package from the American Mathematical Society that provides
174% many useful and powerful commands for dealing with mathematics. If using
175% it, be sure to load this package with the cmex10 option to ensure that
176% only type 1 fonts will utilized at all point sizes. Without this option,
177% it is possible that some math symbols, particularly those within
178% footnotes, will be rendered in bitmap form which will result in a
179% document that can not be IEEE Xplore compliant!
180%
181% Also, note that the amsmath package sets \interdisplaylinepenalty to 10000
182% thus preventing page breaks from occurring within multiline equations. Use:
183%\interdisplaylinepenalty=2500
184% after loading amsmath to restore such page breaks as IEEEtran.cls normally
185% does. amsmath.sty is already installed on most LaTeX systems. The latest
186% version and documentation can be obtained at:
187% http://www.ctan.org/tex-archive/macros/latex/required/amslatex/math/
188
189
190
191
192
193% *** SPECIALIZED LIST PACKAGES ***
194%
195%\usepackage{algorithmic}
196% algorithmic.sty was written by Peter Williams and Rogerio Brito.
197% This package provides an algorithmic environment fo describing algorithms.
198% You can use the algorithmic environment in-text or within a figure
199% environment to provide for a floating algorithm. Do NOT use the algorithm
200% floating environment provided by algorithm.sty (by the same authors) or
201% algorithm2e.sty (by Christophe Fiorio) as IEEE does not use dedicated
202% algorithm float types and packages that provide these will not provide
203% correct IEEE style captions. The latest version and documentation of
204% algorithmic.sty can be obtained at:
205% http://www.ctan.org/tex-archive/macros/latex/contrib/algorithms/
206% There is also a support site at:
207% http://algorithms.berlios.de/index.html
208% Also of interest may be the (relatively newer and more customizable)
209% algorithmicx.sty package by Szasz Janos:
210% http://www.ctan.org/tex-archive/macros/latex/contrib/algorithmicx/
211
212
213
214
215% *** ALIGNMENT PACKAGES ***
216%
217%\usepackage{array}
218% Frank Mittelbach's and David Carlisle's array.sty patches and improves
219% the standard LaTeX2e array and tabular environments to provide better
220% appearance and additional user controls. As the default LaTeX2e table
221% generation code is lacking to the point of almost being broken with
222% respect to the quality of the end results, all users are strongly
223% advised to use an enhanced (at the very least that provided by array.sty)
224% set of table tools. array.sty is already installed on most systems. The
225% latest version and documentation can be obtained at:
226% http://www.ctan.org/tex-archive/macros/latex/required/tools/
227
228
229%\usepackage{mdwmath}
230%\usepackage{mdwtab}
231% Also highly recommended is Mark Wooding's extremely powerful MDW tools,
232% especially mdwmath.sty and mdwtab.sty which are used to format equations
233% and tables, respectively. The MDWtools set is already installed on most
234% LaTeX systems. The lastest version and documentation is available at:
235% http://www.ctan.org/tex-archive/macros/latex/contrib/mdwtools/
236
237
238% IEEEtran contains the IEEEeqnarray family of commands that can be used to
239% generate multiline equations as well as matrices, tables, etc., of high
240% quality.
241
242
243%\usepackage{eqparbox}
244% Also of notable interest is Scott Pakin's eqparbox package for creating
245% (automatically sized) equal width boxes - aka "natural width parboxes".
246% Available at:
247% http://www.ctan.org/tex-archive/macros/latex/contrib/eqparbox/
248
249
250
251
252
253% *** SUBFIGURE PACKAGES ***
254%\usepackage[tight,footnotesize]{subfigure}
255% subfigure.sty was written by Steven Douglas Cochran. This package makes it
256% easy to put subfigures in your figures. e.g., "Figure 1a and 1b". For IEEE
257% work, it is a good idea to load it with the tight package option to reduce
258% the amount of white space around the subfigures. subfigure.sty is already
259% installed on most LaTeX systems. The latest version and documentation can
260% be obtained at:
261% http://www.ctan.org/tex-archive/obsolete/macros/latex/contrib/subfigure/
262% subfigure.sty has been superceeded by subfig.sty.
263
264
265
266%\usepackage[caption=false]{caption}
267%\usepackage[font=footnotesize]{subfig}
268% subfig.sty, also written by Steven Douglas Cochran, is the modern
269% replacement for subfigure.sty. However, subfig.sty requires and
270% automatically loads Axel Sommerfeldt's caption.sty which will override
271% IEEEtran.cls handling of captions and this will result in nonIEEE style
272% figure/table captions. To prevent this problem, be sure and preload
273% caption.sty with its "caption=false" package option. This is will preserve
274% IEEEtran.cls handing of captions. Version 1.3 (2005/06/28) and later
275% (recommended due to many improvements over 1.2) of subfig.sty supports
276% the caption=false option directly:
277%\usepackage[caption=false,font=footnotesize]{subfig}
278%
279% The latest version and documentation can be obtained at:
280% http://www.ctan.org/tex-archive/macros/latex/contrib/subfig/
281% The latest version and documentation of caption.sty can be obtained at:
282% http://www.ctan.org/tex-archive/macros/latex/contrib/caption/
283
284
285
286
287% *** FLOAT PACKAGES ***
288%
289%\usepackage{fixltx2e}
290% fixltx2e, the successor to the earlier fix2col.sty, was written by
291% Frank Mittelbach and David Carlisle. This package corrects a few problems
292% in the LaTeX2e kernel, the most notable of which is that in current
293% LaTeX2e releases, the ordering of single and double column floats is not
294% guaranteed to be preserved. Thus, an unpatched LaTeX2e can allow a
295% single column figure to be placed prior to an earlier double column
296% figure. The latest version and documentation can be found at:
297% http://www.ctan.org/tex-archive/macros/latex/base/
298
299
300
301%\usepackage{stfloats}
302% stfloats.sty was written by Sigitas Tolusis. This package gives LaTeX2e
303% the ability to do double column floats at the bottom of the page as well
304% as the top. (e.g., "\begin{figure*}[!b]" is not normally possible in
305% LaTeX2e). It also provides a command:
306%\fnbelowfloat
307% to enable the placement of footnotes below bottom floats (the standard
308% LaTeX2e kernel puts them above bottom floats). This is an invasive package
309% which rewrites many portions of the LaTeX2e float routines. It may not work
310% with other packages that modify the LaTeX2e float routines. The latest
311% version and documentation can be obtained at:
312% http://www.ctan.org/tex-archive/macros/latex/contrib/sttools/
313% Documentation is contained in the stfloats.sty comments as well as in the
314% presfull.pdf file. Do not use the stfloats baselinefloat ability as IEEE
315% does not allow \baselineskip to stretch. Authors submitting work to the
316% IEEE should note that IEEE rarely uses double column equations and
317% that authors should try to avoid such use. Do not be tempted to use the
318% cuted.sty or midfloat.sty packages (also by Sigitas Tolusis) as IEEE does
319% not format its papers in such ways.
320
321
322%\ifCLASSOPTIONcaptionsoff
323% \usepackage[nomarkers]{endfloat}
324% \let\MYoriglatexcaption\caption
325% \renewcommand{\caption}[2][\relax]{\MYoriglatexcaption[#2]{#2}}
326%\fi
327% endfloat.sty was written by James Darrell McCauley and Jeff Goldberg.
328% This package may be useful when used in conjunction with IEEEtran.cls'
329% captionsoff option. Some IEEE journals/societies require that submissions
330% have lists of figures/tables at the end of the paper and that
331% figures/tables without any captions are placed on a page by themselves at
332% the end of the document. If needed, the draftcls IEEEtran class option or
333% \CLASSINPUTbaselinestretch interface can be used to increase the line
334% spacing as well. Be sure and use the nomarkers option of endfloat to
335% prevent endfloat from "marking" where the figures would have been placed
336% in the text. The two hack lines of code above are a slight modification of
337% that suggested by in the endfloat docs (section 8.3.1) to ensure that
338% the full captions always appear in the list of figures/tables - even if
339% the user used the short optional argument of \caption[]{}.
340% IEEE papers do not typically make use of \caption[]'s optional argument,
341% so this should not be an issue. A similar trick can be used to disable
342% captions of packages such as subfig.sty that lack options to turn off
343% the subcaptions:
344% For subfig.sty:
345% \let\MYorigsubfloat\subfloat
346% \renewcommand{\subfloat}[2][\relax]{\MYorigsubfloat[]{#2}}
347% For subfigure.sty:
348% \let\MYorigsubfigure\subfigure
349% \renewcommand{\subfigure}[2][\relax]{\MYorigsubfigure[]{#2}}
350% However, the above trick will not work if both optional arguments of
351% the \subfloat/subfig command are used. Furthermore, there needs to be a
352% description of each subfigure *somewhere* and endfloat does not add
353% subfigure captions to its list of figures. Thus, the best approach is to
354% avoid the use of subfigure captions (many IEEE journals avoid them anyway)
355% and instead reference/explain all the subfigures within the main caption.
356% The latest version of endfloat.sty and its documentation can obtained at:
357% http://www.ctan.org/tex-archive/macros/latex/contrib/endfloat/
358%
359% The IEEEtran \ifCLASSOPTIONcaptionsoff conditional can also be used
360% later in the document, say, to conditionally put the References on a
361% page by themselves.
362
363
364
365
366
367% *** PDF, URL AND HYPERLINK PACKAGES ***
368%
369%\usepackage{url}
370% url.sty was written by Donald Arseneau. It provides better support for
371% handling and breaking URLs. url.sty is already installed on most LaTeX
372% systems. The latest version can be obtained at:
373% http://www.ctan.org/tex-archive/macros/latex/contrib/misc/
374% Read the url.sty source comments for usage information. Basically,
375% \url{my_url_here}.
376
377
378
379
380
381% *** Do not adjust lengths that control margins, column widths, etc. ***
382% *** Do not use packages that alter fonts (such as pslatex). ***
383% There should be no need to do such things with IEEEtran.cls V1.6 and later.
384% (Unless specifically asked to do so by the journal or conference you plan
385% to submit to, of course. )
386
387
388% correct bad hyphenation here
389\hyphenation{op-tical net-works semi-conduc-tor}
390
391\pagestyle{empty}
392\usepackage{amsmath}
393\usepackage{amsfonts}
394\usepackage{amssymb}
395\usepackage{graphicx}
396\usepackage{ulem}
397\usepackage{cancel}
398\usepackage{fullpage}
399\usepackage{hyperref}
400\usepackage{caption}
401\usepackage{subcaption}
402\usepackage{mathrsfs}
403\usepackage{color}
404\usepackage{float}
405\begin{document}
406\title{Using Convolutional Neural Networks to Distinguish Different Sign Language Alphanumerics}
407
408\author{Stephen L Green, Ivan Y. Tyukin, Alexander N. Gorban% <-this % stops a space
409\thanks{Stephen L. Green is with the University of Leicester, University Road, Leicester, Leicestershire (phone: 07760889686; e-mail: slg46@le.ac.uk).}
410\thanks{Ivan Y. Tyukin is with the University of Leicester, University Road, Leicester, Leicestershire (phone: 01162525106; e-mail: it37@le.ac.uk).}
411\thanks{Alexander N. Gorban is with the University of Leicester, University Road, Leicester, Leicestershire (phone: 01162231433; e-mail: ag153@leicester.ac.uk}}
412
413% The paper headers
414\markboth{Journal of \LaTeX\ Class Files,~Vol.~6, No.~1, January~2007}%
415{Shell \MakeLowercase{\textit{et al.}}: Bare Demo of IEEEtran.cls for Journals}
416
417\maketitle
418\thispagestyle{empty}
419
420\begin{abstract}
421%\boldmath
422Within the past decade, using Convolutional Neural Networks (CNN)’s to create Deep Learning systems capable of translating Sign Language into text has been a breakthrough in breaking the communication barrier for deaf-mute people. Conventional research on this subject has been concerned with training the network to recognize the fingerspelling gestures of a given language and produce their corresponding alphanumerics.\\
423One of the problems with the current developing technology is that images are scarce, with little variations in the gestures being presented to the recognition program, often skewed towards single skin tones and hand sizes that makes a percentage of the population’s fingerspelling harder to detect. Along with this, current gesture detection programs are only trained on one finger spelling language despite there being over two-hundred known variants so far. All of this presents a limitation for traditional exploitation for the state of current technologies such as CNN’s, due to their large number of required parameters.\\
424This work aims to present a technology that aims to resolve this issue by combining a pretrained legacy AI system for a generic object recognition task with a corrector method to uptrain the legacy network. This is a computationally efficient procedure that does not require large volumes of data even when covering a broad range of sign languages such as American Sign Language, British Sign Language and Chinese Sign Language (Pinyin). \\
425Implementing recent results on method concentration, namely the stochastic separation theorem, an AI system is supposed as an operate mapping an input present in the set of images $u \in U$ to an output that exists in set of predicted class labels $q \in Q$ of the alpha numeric that q represents and the language it comes from. These inputs and outputs, along with the interval variables $z \in Z$ represent the system's current state which implies a mapping that assigns an element $x \in \mathbb{R}^n$ to the triple ($u$, $z$, $q$). As all $x_i$ are i.i.d vectors drawn from a product mean distribution, over a period of time the AI generates a large set of measurements $x_i$ that are grouped into two categories: the correct predictions $M$ and the incorrect predictions $Y$.\\
426Once the network has made its predictions, a corrector can then be applied through centering S and Y by subtracting their means. The data is then regularized by applying the Kaiser rule to the resulting eigenmatrix and then whitened before being split into pairwise, positively correlated clusters. Each of these clusters produces a unique hyperplane and if any element x falls outside the region bounded by these lines then it is reported as an error.\\
427As a result of this methodology, a self-correcting recognition process is created that can identify finger spelling from a variety of sign language and successfully identify the corresponding alphanumeric and what language the gesture originates from which no other neural network has been able to replicate.
428\end{abstract}
429\begin{IEEEkeywords}
430Convolutional Neural Networks, Sign Language, Deep Learning, Shallow Correctors.
431\end{IEEEkeywords}
432\IEEEpeerreviewmaketitle
433\section{Introduction}
434\IEEEPARstart{S}{}ign language classification using neural networks has been a goal of data scientists since the start of the decade. Many papers exist on the subject and using Convolutional Neural Networks, each gesture can be assigned classes for the network to predict making them behave like most other iterations of CNN's. The direction that Sign Language recognition has focused on prioritises the correct identification of singular letters and numbers in an assigned language over whole words. This process called Fingerspelling is very appealing to classify as there are only the 26 letters of the alphabet and the ten numerical digits to categorise over the thousands of words that exist in any given sign language dictionary.\\
435The implications of a technology that could read sign language and convert the results into text would be a huge leap forward for communication of the deaf and hard of hearing. A study conducted by the World Health Organisation \cite{IEEEhowto:kopka} states that about 466 million people in the world suffer from hearing loss (with 34 million of these people being children) and 70 million of these people are adept in Sign Language. This number is split between over two hundred known variants of Sign Language practiced all over the world \cite{IEEEhowto:clarion}. Some of the difficulties of Sign Language detection is due to similarities over other types of gesture recognition is the similarities between poses. Changes that would otherwise be ignored in full body detection such as the placement of a thumb or the angle the hand makes with the wrist can change the meaning of the gesture being conveyed. This is why the majority of papers focus on specialising in the alphanumerics of one sign language.\\
436With this paper, the gesture sets for American Sign Language (practised by about half a million people in the United States)\cite{IEEEhowto:asl}, British Sign Language (practised by about one hundred and fifty thousand people in the United Kingdom)\cite{IEEEhowto:bsl} and Chinese (Pinyin) Sign Language are fed into the Inception neural network and an algorithm is created that can recognise all three languages. An error corrector is then appended to the results so any misclassification produced by Inception can be detected and amended before final output. This combination provides a neural network that is able to read multiple sign languages that is both fast and accurate.
437\section{Previous Literature}
438%\begin{figure}[h]
439%\centering
440%\begin{subfigure}{.25\textwidth}
441% \centering
442% \includegraphics[width=.8\linewidth]{Gloves.png}
443% \caption{Motion Gloves \cite{IEEEhowto:gloveexample}}
444% \label{fig:sub1}
445%\end{subfigure}%
446%\begin{subfigure}{.25\textwidth}
447% \centering
448% \includegraphics[width=.97\linewidth]{Kinect.png}
449% \caption{Microsoft Kinect \cite{IEEEhowto:kinectexample}}
450% \label{fig:sub2}
451%\end{subfigure}
452%\caption{The most common examples of gesture tracking in current literature, both glove based and vision based.}
453%\label{fig:test}
454%\end{figure}
455The two most popular categories of gesture recognition are with glove based systems and vision based systems. Glove based methods \cite{IEEEhowto:glove} have the advantage of being able to record every slight motion that the user makes that provides the specifics of the flex motion the hand is making along its tracjectory, overcoming the problem of movement based gestures that exist especially outside of sign language. They are also able to provide very accurate results, with a test of 5113 unique words in Chinese Sign Language giving an average accuracy of 91.9\% on this large vocabulary \cite{IEEEhowto:glove2}. The problem is that this method is cumbersome. Motion detection gloves are currently very expensive and are not recommended for everyday interactions with the hard of hearing. There is also a link that needs to be maintained between the gloves and the computer for the gestures to be captured effectively. There are examples of this being done wirelessly but this connection still needs to be maintained and therefore impractical for the required kind of of simultaneous translation this project needs.\\
456Vision based system often require devices such as the Microsoft Kinect which is able to track the positions of hand gestures relative to the human body with similar accuracy rates to gesture capture using gloves\cite{IEEEhowto:kinect}. Both of these methods however are impractical for this project so instead all gestures in the dataset were filmed with a Samsung Galaxy S8 camera. The video files were then split into images and this is what has made up the dataset.\\
457With one notable exception \cite{IEEEhowto:MultiSign}, The idea of a multi-language detection system has not been mentioned in the existing literature. The most likely reason is that current research dedicated to just one widely used language is already computationally expensive. Even the most basic alphabet fingerspelling recognition networks require large datasets to train on to receive good results and these sets only exist for a handful of known sign languages \cite{IEEEhowto:Multiplesets}.
458The appended error correction program is a continuation of our previous work \cite{IEEEhowto:OurPaper} where Inception is run on the American Sign Language signs for 0-9. The results of the initial experiments was a correct classification rate of 82.4\% on 10000 tested images. After the images were split into a training and testing set with a 4:1 ratio, the corrector was applied that was able to successfully remove misclassifications from the test set with very little change to the number of True Positive results that were falsely removed. The full algorithm is explained in Appendix A.
459\section{Implementation}
460\subsection{Pre-processing}
461For these experiments, Inception Version 3 was trained on 896000 RGB images covering 112 gestures filmed with multiple participants of varying age, gender and skin colour, with 8000 images for each respective gesture. Each image was taken with a Samsung Galaxy S8's front 8 megapixel camera with resolution 1920x1080, which are later resized to 299x299 when fed through Inception. Each image set represents the gesture taken at various angles and distances covering as many variations as possible where the camera is in front of the gesture at some angle. To mitigate the problem of background noise, each participant's gestures were taken in different locations so the only common element within each group was the gesture itself.\\ Conventional tracking software often requires the image to be within a certain distance from the camera for the sign to be detected while this method provide a plethora of images that can help detection regardless of which direction the camera is located when pointed at the sign. While the majority of static detection algorithms omit gestures that require movement (in this experiment, ASL J, ASL Z, BSL H and BSL J usually require motion), this work instead focuses on still images that are unique to the process of making the gesture and can be used to distinguish them from the rest of the set without the loss of an alphanumeric.
462\subsection{Architecture}
463The Inception model is a computationally efficient Convolutional Neural Network originally developed by Google in December 2015 to provide state of the art performance rates on the ImageNet Large Scale Visual Recognition Challenge \cite{IEEEhowto:InceptionExplain}. Unlike similar learning algorithms like VGGNet and AlexNet which use deep, wide networks to obtain high performance rates at the cost of long computation times which made these procedures much more impractical for computers with small processing power and lack of access to high-end GPU's, Inception utilises repeated modules to replicate these model's success at a quicker rate.\\
464%\begin{figure}[h!]
465% \centering
466% \includegraphics[width=\linewidth]{InceptionFramework.png}
467% \caption{Inception framework}
468%\end{figure}
469\\
470The model starts with an 299x299x3 input corresponding to a 3 colour (RGB) channel square image with a size of 299 pixels that is processed through stacked Inception modules with five regular convolution layers along with a max pooling function and a softmax output that produces the predicted label for the input \cite{IEEEhowto:Inception}. Each module processes the correlation statistics of the layer before it and groups the clusters of units into filter banks \cite{IEEEhowto:InceptionFilter}. This results in a large percentage of units concentrated in single regions which can be covered with a 1x1 convolution preserving the original dimensions of the structure with minimal loss of information while other structures are fed through 3x3 or 5x5 filters depending on how spread out the clusters are (the operations through these larger filters will increase as Inception progresses once the spatial concentration decreases such that dimensionality needs to be reduced to compensate). A fourth pooling path is also included for outliers that have little to no relation to the rest of the data. Small 1x1 convolution filters are attached to the larger layers reducing the number of necessary calculations. These modules stacked in succession create a unique architecture that has 12 times less parameters and AlexNet and 36 times less parameters than VGGNet \cite{IEEEhowto:Inception}.
471\subsection{Training}
472The top layer of Inception is a softmax layer that takes a 2048-dimensional vector as an input called a bottleneck. With 112 labels, the sum of each of the weights and bias's necessary for Inception to be trained comes to 229,488 parameters that are learned in total \cite{IEEEhowto:Bottleneck}.\\
473Inception is then trained on the dataset of 896,000 images with a learning rate of 0.01 and 50000 training steps in total with a training batch size of 100 images. In total, 89600 images are used for testing, 89600 images are used for training and 716,800 images are used for training. Once these steps have been carried out, 448,000 images are given to the retrained Inception algorithm. For each image, the softmax layer takes the bottlenecks and produces a probability for each label that corresponds to the likelihood of that label correctly matching the given image according to what Inception has been trained on. The label with the highest respective probability is chosen as the final prediction made by Inception.
474\subsection{Post-processing}
475When each image has a predicted label attached toward it, a corrector can be appended to the current system that is able to detect incorrect labels and reassess them \cite{IEEEhowto:Corrector}. Each element is first sorted into with set $\mathcal{M}$ which contains each element with a correct label attached and $\mathcal{Y}$ which contains every element with an incorrect label attached along with their union $\mathcal{S} = \mathcal{M} \cup \mathcal{Y}$. The sets $\mathcal{M}^1=\mathcal{M}\text{, }\mathcal{Y}^1=\mathcal{Y}\text{ \& }\mathcal{S}^1=\mathcal{S}$ are then initialised, the number of clusters $p$ is chosen and the filtering threshold $\theta$ is set.
476\subsubsection{Centering}
477The data is first centered, where $\mathcal{S}^i_c\text{ \& }\mathcal{Y}^i_c$ are created by subtracting the mean $\overline{x}(\mathcal{S}^i)$ from the elements of each set respectively.
478\begin{center}
479$\mathcal{S}^i_c = \{\mathbf{x} \in \mathbb{R}^n | \mathbf{x} = \xi - \overline{\mathbf{x}}(\mathcal{S}^i)\text{, }\xi \in \mathcal{S}^i \}$
480$\mathcal{Y}^i_c = \{\mathbf{x} \in \mathbb{R}^n | \mathbf{x} = \xi - \overline{\mathbf{x}}(\mathcal{S}^i)\text{, }\xi \in \mathcal{Y}^i \}$
481\end{center}
482\subsubsection{Regularization}
483The covariance matrix of $\mathcal{S}^i$ is calculated along with the corresponding eigenvalues and eigenvectors. All eigenvectors $h_1\text{, }h_2\text{, }\ldots \text{, }h_m$ that's eigenvectors $\lambda_1\text{, }\lambda_2\text{, }\ldots \text{, }\lambda_m$ that pass the Kaiser-Guttman test are combined into a single matrix $H$ \cite{IEEEhowto:Kaiser} before being multiplied by each element in $\mathcal{S}^i_c\text{ \& }\mathcal{Y}^i_c$:
484\begin{center}
485$\mathcal{S}^i_r = \{\mathbf{x} \in \mathbb{R}^n | \mathbf{x} = H^T\xi\text{, }\xi \in \mathcal{S}^i_c\}$
486$\mathcal{Y}^i_r = \{\mathbf{x} \in \mathbb{R}^n | \mathbf{x} = H^T\xi\text{, }\xi \in \mathcal{Y}^i_c\}$
487\end{center}
488\subsubsection{Whitening}
489The two sets then undergo a whitening coordinate transformation ensuring that the covariance matrix of the transformed data is the identity matrix:
490\begin{center}
491$\mathcal{S}^i_w = \{\mathbf{x} \in \mathbb{R}^m | \mathbf{x} = Cov(\mathcal{S}^i_r)^{-\frac{1}{2}}\xi\text{, }\xi \in \mathcal{S}^i_r\}$
492$\mathcal{Y}^i_w = \{\mathbf{x} \in \mathbb{R}^m | \mathbf{x} = Cov(\mathcal{S}^i_r)^{-\frac{1}{2}}\xi\text{, }\xi \in \mathcal{Y}^i_r\}$
493\end{center}
494\subsubsection{Projection}
495Elements of $\mathcal{S}^i_w\text{, }\mathcal{Y}^i_w$ are then projected onto the unit sphere by scaling them to unit length $\mathbf{x}\mapsto\mathbf{x}/\|\mathbf{x}\|$
496\subsubsection{Clustering}
497The error set $\mathcal{Y}^i_w$ is partitioned into p clusters $\mathcal{Y}^i_{w\text{,}1}\text{, }\mathcal{Y}^i_{w\text{,}2}\text{, }\ldots \text{, }\mathcal{Y}^i_{w\text{,}p}$ with elements that are pairwise positively correlated.
498\subsubsection{Training}
499For each cluster $\mathcal{Y}_{w,i}$, $i=1,\dots,p$ and its complement $\mathcal{S}_w \setminus \mathcal{Y}_{w\text{, }i}$ we construct the following separating hyperplanes:
500\[
501\begin{split}
502h_i(\mathbf{x})=& \ell_i(\mathbf{x})-c_i, \\
503\ell_i(\mathbf{x}) =& \left\langle\frac{\mathbf{w}_i}{\|\mathbf{w}_i\|}\text{, }\mathbf{x}\right\rangle, \ c_i= \min_{\xi \in \mathcal{Y}_{w\text{, }i}}\left\langle\frac{\mathbf{w}_i}{\|\mathbf{w}_i\|}\text{, }\xi\right\rangle\\
504\mathbf{w}_i =& (Cov(\mathcal{S}^i_w \setminus \mathcal{Y}^i_{w\text{,}i}) + Cov(\mathcal{Y}^i_{w\text{,}i}))^{-1}\times\\
505&(\overline{\mathbf{x}}(\mathcal{Y}^i_{w\text{,}i}) - \overline{\mathbf{x}}(\mathcal{S}^i_w \setminus \mathcal{Y}^i_{w\text{ }i})).
506\end{split}
507\]
508The values of $c_j$ that are greater than $\theta$ have their respective hyperplanes kept and a corresponding element $f_j(x)$ is created.
509\begin{center}
510$f_j(\mathbf{x}) = f\left(\langle \frac{WH^T(\mathbf{x} - \overline{\mathbf{x}}(S^i)))}{|x|}\text{, }\frac{\mathbf{w}_j}{||\mathbf{w}_j||} \rangle - c_j\right)$
511\end{center}
512The set of elements $\mathbf{x} \in s^i_w \setminus y^i_w$ where $h_j(\mathbf{x}) \geq 0$ is called $\mathcal{C}$ and the elements of the set $\mathcal{C}_j \cup \mathcal{Y}^i_{w\text{,}j}$ are projected orthogonally onto the hyperplane $h_j(\mathbf{x}) = l_j(\mathbf{x}) - c_j$ as:
513\begin{center}
514$\mathbf{x}\mapsto\left(I - \frac{\mathbf{w}_j\mathbf{w}_j^T}{||\mathbf{w}_j||^2}\right)\mathbf{x} + \frac{c_j\mathbf{w}_j}{||\mathbf{w}_j||} = P(w_j)\mathbf{x} + b(\mathbf{w}_j\text{, }c_j)$
515\end{center}
516This determines a hyperplane $h_{j\text{,}2}(\mathbf{x}) = \langle \mathbf{w}_{j\text{, }2}\text{, }\mathbf{x} \rangle - c_{j\text{, }2}$ whose values is less than 0 for all projections of $\mathcal{C}_j$ and greater than or equal to 0 for all projections of $y^i_{\mathbf{w}\text{,}j}$. If no such planes exist,, Linear Fisher Discriminant is used.\\
517A second function is then created followed by an amalgamation that only produces a positive response when the output of the previous two functions is also positive.
518\begin{center}
519$f^\perp_j(\mathbf{x}) = f\left(\langle P(\mathbf{w}_j)\left(\frac{WH^T(\mathbf{x} - \overline{x}(S^i))}{||\mathbf{x}||}\right) + b(\mathbf{w}_j\text{, }c_j)\text{, }\mathbf{w}_{j\text{, }2}\rangle - c_{j\text{, }2}\right)$
520$f^c_j(\mathbf{x}) = f(\text{Step}(f_j(\mathbf{x})) + \text{Step}(f^\perp_j(\mathbf{x})) - 2)$
521\end{center}
522\subsubsection{Integration}
523For any $\mathbf{x}$ that produces a value of $f^c_j(\mathbf{x})$ that's greater than 0, the label can then be swapped accordingly to the next best matching label that Inception reported for that image.
524\subsubsection{Testing}
525The newly generated sets $\mathcal{M}^{i+1}\text{, }\mathcal{Y}^{i+1}\text{ \& }\mathcal{S}^{i+1}$, the cluster number $p$ and the filtering threshold $\theta$ can be carried over to another iteration of correcting if deemed necessary.
526\section{Results}
527???
528\section{Conclusion And Future Work}
529?????
530\appendices
531% appendices go here
532\ifCLASSOPTIONcaptionsoff
533 \newpage
534\fi
535\begin{thebibliography}{1}
536\bibitem{IEEEhowto:kopka}
537World Health Organization. Deafness and Hearing Loss. Available online: http://www.who.int/mediacentre/factsheets/fs300/en/
538\bibitem{IEEEhowto:clarion}
539Clarion UK. https://www.clarion-uk.com/know-many-sign-languages-world/
540\bibitem{IEEEhowto:asl}
541Mitchell, Ross E., et al. "How many people use ASL in the United States? Why estimates need updating." Sign Language Studies 6.3 (2006): 306-335.
542\bibitem{IEEEhowto:bsl}
543British Deaf Association https://bda.org.uk/help-resources/
544\bibitem{IEEEhowto:gloveexample}
545http://www.washington.edu/news/2016/04/12/uw-undergraduate-team-wins-10000-lemelson-mit-student-prize-for-gloves-that-translate-sign-language/
546\bibitem{IEEEhowto:kinectexample}
547https://www.microsoft.com/en-us/research/blog/kinect-sign-language-translator-part-1/
548\bibitem{IEEEhowto:glove}
549M. Mohandes, S. Aliyu and M. Deriche, "Prototype Arabic Sign language recognition using multi-sensor data fusion of two leap motion controllers," 2015 IEEE 12th International Multi-Conference on Systems, Signals \& Devices (SSD15), Mahdia, 2015, pp. 1-6.
550\bibitem{IEEEhowto:glove2}
551Fang, Gaolin \& Gao, Wen \& Zhao, Debin. (2007). Large-Vocabulary Continuous Sign Language Recognition Based on Transition-Movement Models. Systems, Man and Cybernetics, Part A: Systems and Humans, IEEE Transactions on. 37. 1 - 9. 10.1109/TSMCA.2006.886347.
552\bibitem{IEEEhowto:kinect}
553Yang, Hee-Deok. “Sign Language Recognition with the Kinect Sensor Based on Conditional Random Fields.†Sensors (2014).
554\bibitem{IEEEhowto:MultiSign}
555K. Kumar, Vinay \& Goudar, R.H. \& Desai, V.T.. (2015). Sign Language Unification: The Need for Next Generation Deaf Education. Procedia Computer Science. 48. 673-678. 10.1016/j.procs.2015.04.151.
556\bibitem{IEEEhowto:Multiplesets}
557http://facundoq.github.io/unlp/sign\text{\_}language\text{\_}datasets/index.html
558\bibitem{IEEEhowto:OurPaper}
559Fast Construction of Correcting Ensembles for Legacy Artificial Intelligence Systems: Algorithms and a Case Study
560\bibitem{IEEEhowto:InceptionExplain}
561arXiv:1512.00567 [cs.CV]
562\bibitem{IEEEhowto:Inception}
563https://medium.com/initialized-capital/we-need-to-go-deeper-a-practical-guide-to-tensorflow-and-inception-50e66281804f
564\bibitem{IEEEhowto:InceptionFilter}
565arXiv:1409.4842 [cs.CV]
566\bibitem{IEEEhowto:Bottleneck}
567arXiv:1805.06618 [cs.CV]
568\bibitem{IEEEhowto:Corrector}
569Fast Construction of Correcting Ensembles for Legacy
570Artificial Intelligence Systems: Algorithms and a Case Study,
571Ivan Yu. Tyukin, Alexander N. Gorban, Stephen Green, Danil Prokhorov
572\bibitem{IEEEhowto:Kaiser}
573D. Jackson, Stopping Rules in Principal Components Analysis: A Comparison of Heuristical and Statistical Approaches, Ecology 74 (8) (1993) 2204-2214.
574\end{thebibliography}
575\end{document}