wL3ujYMK

· 6 years ago · Oct 23, 2019, 04:08 AM
1\documentclass[nonblindrev]{informs3}
2%\documentclass[nonblindrev]msom,
3%\documentclass[mnsc,blindrev]{informs3}
4%\documentclass[opre,nonblindrev]{informs3} % current default for manuscript submission
5%\documentclass[nonblindrev]{informs3}
6
7
8\OneAndAHalfSpacedXI
9%%\OneAndAHalfSpacedXII % Current default line spacing
10%%\DoubleSpacedXII
11%%\DoubleSpacedXI
12
13% If hyperref is used, dvi-to-ps driver of choice must be declared as
14%   an additional option to the \documentclass. For example
15%\documentclass[dvips,mnsc]{informs3}      % if dvips is used
16%\documentclass[dvipsone,mnsc]{informs3}   % if dvipsone is used, etc.
17
18% Private macros here (check that there is no clash with the style)
19
20
21
22
23
24
25\usepackage{boldline}
26\usepackage[table]{xcolor}
27\usepackage{tabularx,booktabs}
28\usepackage{array}
29\usepackage{endnotes}
30\usepackage{amsmath}
31\usepackage{longtable}
32\usepackage{adjustbox}
33
34\usepackage{multirow}
35\usepackage{framed}
36\usepackage{makecell}
37\usepackage{threeparttable}
38\usepackage{comment}
39\usepackage{bm}
40\usepackage{subfloat}
41%\usepackage[flushleft]{threeparttable}
42\usepackage{subcaption}
43\usepackage{tabularx}
44\usepackage{float}
45\usepackage{rotating}
46\usepackage{caption}
47
48
49
50\let\footnote=\endnote
51\let\enotesize=\normalsize
52\def\notesname{Endnotes}%
53\def\makeenmark{$^{\theenmark}$}
54\def\enoteformat{\rightskip0pt\leftskip0pt\parindent=1.75em
55  \leavevmode\llap{\theenmark.\enskip}}
56\newcommand{\mb}[1]{\mbox{\boldmath $#1$}}
57\newcommand{\mbt}[1]{\mbox{\boldmath $\tilde{#1}$}}
58\newcommand{\mbst}[1]{{\mbox{\boldmath \scriptsize{$\tilde{#1}$}}}}
59\newcommand {\E}{{\rm E}}
60%\newcommand{\proof}{{\bf Proof : }}
61%\newcommand{\qed}{\rule{7pt}{7pt}}
62%\newcommand{\remark}{{\bf Remark : }}
63\newcommand{\defi}{\stackrel{\Delta}{=}}
64%\newtheorem{theorem}{Theorem}
65%\newtheorem{lemma}{Lemma}
66%\newtheorem{prop}{Proposition}
67%\newtheorem{coro}{Corollary}
68%\newtheorem{claim}{Claim}
69\newtheorem{Defi}{Definition}
70%\usepackage{ntheorem}
71%\theoremstyple{break}
72\theoremstyle{break}
73\newtheorem{algorithm}{Algorithm}
74
75\def\blot{\quad \mbox{$\vcenter{ \vbox{ \hrule height.4pt
76      \hbox{\vrule width.4pt height.9ex \kern.9ex \vrule width.4pt}
77           \hrule height.4pt}}$}}
78% Natbib setup for author-year style
79\usepackage{natbib}
80 \bibpunct[, ]{(}{)}{,}{a}{}{,}%
81 \def\bibfont{\small}%
82 \def\bibsep{\smallskipamount}%
83 \def\bibhang{24pt}%
84 \def\newblock{\ }%
85 \def\BIBand{and}%
86
87%\usepackage{threeparttable}
88
89%% Setup of theorem styles. Outcomment only one.
90%% Preferred default is the first option.
91\TheoremsNumberedThrough     % Preferred (Theorem 1, Lemma 1, Theorem 2)
92%\TheoremsNumberedByChapter  % (Theorem 1.1, Lema 1.1, Theorem 1.2)
93\ECRepeatTheorems
94
95%% Setup of the equation numbering system. Outcomment only one.
96%% Preferred default is the first option.
97\EquationsNumberedThrough    % Default: (1), (2), ...
98%\EquationsNumberedBySection % (1.1), (1.2), ...
99
100% For new submissions, leave this number blank.
101% For revisions, input the manuscript number assigned by the on-line
102% system along with a suffix ".Rx" where x is the revision number.
103\MANUSCRIPTNO{}
104%(Please, provide the manuscript number!)
105
106%-----------------------------------------------------------------------------
107% To set spacing:
108\def\spacingset#1{\renewcommand{\baselinestretch}%
109 {#1}\small\normalsize}
110\newcommand{\resetspacing}{\spacingset{1.70}}
111\newcommand{\unitspacing}{\spacingset{1.0}}
112\newcommand{\tightspacing}{\spacingset{1.25}}
113%-----------------------------------------------------------------------------
114
115
116\begin{document}
117\RUNAUTHOR{ }
118
119\RUNTITLE{Redundancy Optimization with Side Information}
120
121\TITLE{Redundancy Optimization with Side Information: State-Dependent Distributionally Robust Models}
122%\author{Shuming Wang}
123% use optional labels to link authors explicitly to addresses:
124
125\ARTICLEAUTHORS{%
126}
127
128
129%\author{}
130%\affil[1]{School of Economics and Management, University of Chinese Academy of Sciences, China}
131%\affil[2]{University of Chinese Academy of Sciences, China}
132%% \address[label2]{}
133\ABSTRACT{{\color{red}
134In this paper, we present a state-dependent, distributionally robust framework for solving a series-parallel, multi-type component, mixed redundancy strategy system with uncertain component lifetimes. We assume that the distribution of component lifetimes are not exactly known, instead only partial distributional information (like mean and dispersion) can be extracted from the data set. We aim to product a system design that are reliable enough even under the worst possible distribution, given the partial information extracted. Moreover, We introduce clustering process in extracting information, which also includes techniques of dimension reduction and side information. We extract distributional information from each cluster, instead of from the entire data set. This helps us to incorporate more information into the ambiguity set, which results in smaller ambiguity set and therefore better result. Although the model itself is highly nonlinear, we utilize linearization technique to transform the model into a mixed integer linear program, without adding integer decision variables other than the original ones. This makes the solving the problem tractable, and easy to implement by off-the-shelves library. Finally, we present computational study to support our theoretical results.}
135}
136\KEYWORDS {Redundancy optimization, lifetime uncertainty, mixed redundancy strategies, robust optimization, linear programming, clustering}
137
138
139\maketitle
140%\end{frontmatter}
141
142
143\section{Introduction}
144{\color{blue}
145Redundancy optimization or redundancy allocation, roughly speaking, is  to determine the most cost-efficient allocations for redundancies while keeping system reliability above the predefined level (Kuo and Wan~2007)). It is well-known in multiple engineering domains, {\it e.g.}, railway engineering, nuclear engineering and aerospace engineering, to name a few. In a redundancy system, the cold-standby and the active-parallel are two typical and commonly used strategies in redundancy configuration ({\color{red} Ardakan~et~al.~2014}), and lifetime uncertainty of components has always been a crucial modeling issue for computing the system reliability and optimizing redundancy allocations, under different redundancy strategies.
146
147
148
149
150As a famous optimization problem under uncertainty, redundancy optimization has been extensively studied in the past decades. Different approaches have been proposed to deal with the modeling difficulty on the lifetime uncertainty and the resulting redundancy optimization problems. For instance, the optimization models with deterministic reliability levels of components({\it e.g.}, Coit~et~al.~1996, Kulturel-Konak~et~al.~2003, Ardakan~et~al.~2014) and with uncertain component reliability levels ({\it e.g.}, Feizollahi and Modarres~2012,  Feizollahi~et~al.~2014,~Feizollahi~et~al.~2015);  the stochastic programming approaches with known lifetime distributions of components ({\it e.g.}, Coit and Smith~1998,  Prasad et al.~2001, Zhao and Liu~2003); and most recently, the distributionally robust optimization (DRO) approach with uncertain lifetime distributions (Wang~et~al.~2019). A detailed literature review is provided in Section~\ref{sec:LR}. Several difficulties have been resolved, yet new challenges are constantly emerging.
151
152
153On the one hand, all the above mentioned approaches leverage solely on utilizing the lifetime information, which might not be sufficient for the increasingly complex systems nowadays. Technology advancements constantly drive the engineering systems to evolve toward ever higher complexities with larger number of components. For instance, the recent autonomous cars involve multiple sensors, control units, communication devices, safety barriers, artificial intelligence (AI) components, etc., which render them much more complex than the conventional electromechanical style of cars. Another example is the high-speed train, where the new technologies, such as automatic train operation (ATO) system, 5G telecommunication devices and auxiliary braking systems, are being integrated into the train system for more efficient operations. New technologies bring great upgrade to the existing system, which however also create the new sources of failures. The complexity of the system could largely complicate the uncertainty in the occurrence of the time-to-failures or lifetimes, and there is call for more related useful side information (in addition to lifetime data), {\it e.g.}, working condition (frequency and intensity), seasonal information (temperature and wether), quality information of the components, to be incorporated for estimating more effectively the component lifetime characteristics for redundancy optimization.
154
155
156On the other hand, the above mentioned side information is becoming more and more available. Thanks to
157the pervasive sensing capabilities of new generation of industrial IoT systems, it is often possible to have the senor records multiple types of data (even realtime data stream) on the working environment of components.
158For instance, there are thousands of sensors installed in one electric multiple unit (EMU) train, such as temperature sensor, humidity sensor, accelerator, {\it etc}. The sensors transmit their readings at a very high frequency, {\it i.e.}, 100Hz, to the onboard database system in the train, and the train will upload all stored data to the station once it completes the daily operation. Among all these data collected, some can be importantly relevant to the particular failure events. For example, the vibration signal has a strong correlation to the degradation condition or the abnormal condition of a running wheel of a train. Therefore,
159such side information can improve the description to the failures as well as to the lifetime distributions, and should be incorporated into the model together with the lifetime data itself to enhance the decision quality of the redundancy allocation under lifetime uncertainty.
160
161
162In this paper, we consider a redundancy optimization problem of a series-parallel system with  mixed redundancy strategy of active-parallels and cold-standbys, where the distributions of component lifetimes are assume to be uncertain. We develop a new optimization framework, based on distributionally robust optimization with conditional states, that is able to effectively incorporate the side information on component lifetimes, via the advanced machine learning techniques, {\it e.g.}, clustering, regression trees and PCA, to realize a more effective decision making of redundancy allocation under distributional uncertainty of lifetimes. Among all the above mentioned related studies, our paper is most relevant to Wang~et~al.~(2019), yet with key extensions in
163several directions. We briefly summarize our major contributions as follows:
164\begin{itemize}
165\item We develops a new distributionally robust redundancy optimization framework with multiple conditional states, which is able to flexibly incorporate the side information related lifetimes. To the best of our knowledge, our study is the first redundancy optimization approach with distributional uncertainty that is equipped with the machinery for harnessing the side information via the advanced machine learning techniques. In addition, the parameter selection of our framework can be readily realized by a cross validation approach developed.
166\item From the modeling perspective, our present study generalizes naturally Wang~et~al.~(2019) in two folds: (i) The distributionally robust redundancy model of Wang~et~al.~(2019) can be treated as a special case of our current model with a single state. (ii) Our current model considers a set of general conic constraints to capture the possible marginal and/or cross correlations of the lifetimes of different types of components, which also is the generation of the absolute dispersion constraints in the ambiguity set considered in Wang~et~al.~(2019).
167\item Methodologically,  our developed distributionally robust redundancy optimization model, to our best knowledge, is also the first distributionally robust chance-constrained optimization model over the state-dependent ambiguity set with general conic distributional constraints, which can also be readily extended to the general chance-constrained optimization problems.
168\item Computationally, we show that the worst-case system reliability level over the state-dependent ambiguity set given redundancy design, in several common cases, can be efficiently computed by solving a tractable conic program ({\it e.g.}, LP or SOCP), while the resulting distributionally robust redundancy optimization model solves a mixed integer conic program ({\it e.g.}, MILP or MISOCP). Furthermore, we develop a computationally viable supergradient-based decomposition algorithm to further enhance the scalability of the resulting MIP problems.
169\end{itemize}
170
171
172
173The remaining of the paper is organized as follows. Section~\ref{sec:LR} reviews the related studies on redundancy optimization. Section~\ref{sec:base} introduces the base problem formulation of redundancy optimization that we are studying. In section~\ref{sec:DRRM}, we introduce our distributionally robust redundancy optimization model with side information, and discuss the tractable reformulations of the developed model as well as the hyperparameter selection via cross validation. In Section~\ref{sec:algo}, we discuss the development of the supergradient-based decomposition algorithm. Finally, we present numerical experiments and a case study in Section~\ref{sec:CS}, and conclude our study in~\ref{sec:conclusion}.
174
175
176}
177
178\paragraph{\bf Notations:}
179We use the tilde to denote a random parameter, {\it e.g.,} $\bm{\tilde{z}} \in \mathbb{R}^n$; ``$\mathbb{P}$" to denote a specific probability distribution; and $\bm{\tilde{z}} \sim \P$ to denote the random variable $\bm{\tilde{z}}$ with  probability distribution $\P$.
180For a random variable $\bm{\tilde{z}} \in \mathbb{R}^n$ with distribution $\P$ and function $\g:\mathbb{R}^n \mapsto \mathbb{R}^m$, we denote $\E_{\P}(\g(\bm{\tilde{z}}))$ as the expectation of random variable $\g(\bm{\tilde{z}})$ with the probability measure $\P$. We use  $\mathcal{P}\left( \mathbb{R}^{n}\right)$ to represent the collection of all probability distributions of a random variable of dimension $n$, and ``$\mathbb{F}$" is a set of distributions for modeling the distributional ambiguity.
181
182\section{Literature Review}\label{sec:LR}
183In this section we provide a brief review of approaches to RAP, including RAP with deterministic component reliability, RAP with uncertain component reliability and RAP with uncertain component lifetimes. In addition, we also briefly review the studies on robust optimization that are methodologically related to our work.
184
185{\bf RAP with deterministic component reliability} Early studies on RAP focuses on formulations in which components have deterministic reliability. These problems are in general NP-hard(Chern.~1992), and efforts have been put into approximate algorithms to make them tractable. Coit~et~al.~(1996) uses a combination of neural network and genetic algorithm to search for the minimum cost design for a series-parallel system, given a minimum reliability constraint. Genetic algorithm is also applied to a system with cold-standby redundancy strategy (Ardakan~et~al.~2014). Liang~et~al.~(2004) applies ant colony optimization to a series-parallel system, in which the failure rates of components when not in use are the same as when in use (i.e., active redundancy). Kulturel-Konak~et~al.~(2003) introduces tabu search to solve a singled type component series-parallel system, with k-out-of-n subsystems and exponential component failure times.
186
187It can be seen that RAP with deterministic component reliability suffers from its inherent theoretical intractability. Researchers has to develop various heuristic algorithms to mitigate this drawback, and there is currently no algorithm that is superior to others in all cases. Moreover, the assumption of known deterministic component reliability level is often impractical due to lack of data, as discussed in previous section. As the result, the focus recently shifted to RAP with uncertain component reliability.
188
189
190{\bf RAP with uncertain component reliability.} Most papers on RAP with uncertainty consider variations of the individual component reliability levels. Bhunia~et~al.~(2010) considered a reliability optimization problem for a series system as a stochastic chance constrained optimization problem with interval-valued reliability levels of individual components; the problem was transformed into an unconstrained integer programming problem with interval coefficients and solved by metaheuristics. Tekiner-Mogulkoc and Coit (2011) discussed an RAP that minimizes the coefficient of variation of the system reliability estimate with respect to a minimum system reliability constraint, in a series-parallel system, and an exact algorithm based on linear integer programming and a heuristic approach based on combined neighborhood search were proposed to solve the problems when component mixing is or is not allowed, respectively. Sun et al.~(2017) considered the uncertain component state performance and state probability in RAP, where the experts' epistemic estimations as the
191uncertainty parameters were modeled to be set-valued, and the resulting model was solved by a local-search-based metaheuristics approach.
192
193
194
195Recently, some studies have addressed component reliability uncertainty in RAP using robust optimization (Bertsimas and Sim~2004), where component reliability levels are allowed to vary within an {\em uncertainty set}, and the resulting worst-case system reliability level is considered in the optimization of the RAP.  Feizollahi and Modarres (2012) and Feizollahi~et~al.~(2014) considered active redundancy in series-parallel systems and developed a robust RAP with an interval uncertainty set and a robust RAP with polyhedral budgeted uncertainty set, respectively. In both studies, the structures of the resulting robust RAP problems were well investigated, and problems could be transformed and solved iteratively with a series of MIP instances. Furthermore, Feizollahi~et~al.~(2015) developed a robust cold-standby redundancy allocation model for series-parallel systems with budgeted uncertainty; the problem could also be solved by an MIP-based iterative algorithm.
196
197In addition, other related studies in this vein include multicriteria RAP models (Coit~et~al.~2004, Zaretalab~et~al.~2015, Govindan~et~al.~2017) and system reliability evaluation due to the lack of knowledge or the imprecision of human estimation (Li~et~al.~2014). More related papers can be found in an excellent survey (Kuo and Wan~2007).
198
199
200
201
202
203{\bf RAP with stochastic lifetimes.} In the RAP literature, only a few studies have explicitly considered lifetime distributions. For example, Coit and Smith~(1998) and Prasad et al.~(2001) have proposed the maximization of a percentile life of the system subject to a budget constraint. Zhao and Liu~(2003) developed stochastic programming models for both parallel and standby redundancy optimization problems, which are solved by simulating the component lifetimes from any given known probability distributions and using metaheuristics. In addition, several studies have also performed theoretical analysis on the stochastic comparisons of redundancy allocation for a very limited number of components ({\it e.g.,} two or three) based on the given lifetime distributions (Li and Hu~2008, Zhao~et~al.~2011). All of these studies require the exactly known probability distributions of component lifetimes, which in practice is often difficult to specify or calibrate. In addition, Coit and Smith~(2002) considered uncertain Weibull component lifetime distributions with random-scale parameters in RAP; their model maximizes a lower percentile of the system time-to-failure distribution. The solution also leverages the approach of metaheuristics due to the complicated problem structure.
204
205
206Most of the resulting redundancy optimization models of the above RAP studies, except for the robust optimization models, are difficult to solve, and the proposed solution approaches ({\it e.g.,} metaheuristics), in general, are intractable. Among the related studies in the above two streams, our work is closest to that of Feizollahi and Modarres (2012), Feizollahi~et~al.~(2014), Feizollahi~et~al.~(2015), and Zhao and Liu~(2003). The research gap can be established in the following aspects:
207\begin{itemize}
208  \item The stochastic RAP model of Zhao and Liu~(2003) assumes the specific probability distributions for component lifetimes, which, as discussed previously, cannot be easily calibrated in the practice of reliability engineering, while our proposed RAP model allows for the lifetime distributions to be uncertain and vary over a set of possible distributions that are characterized by the available (limited) lifetime information. On the other hand, the robust RAP models of Feizollahi and Modarres (2012, Feizollahi~et~al.~(2014) and Feizollahi~et~al.~(2015) are based on regular robust optimization, which may sacrifice some critical distributional information of lifetimes, while our proposed RAP model is able to incorporate the distributional characteristics of component lifetimes ({\it e.g.,} mean, dispersion, and support information) into the redundancy optimization.
209  \item Our proposed RAP model considers a general setting with different redundancy strategies as well as multiple types of components for redundancy, while Zhao and Liu~(2003), Feizollahi and Modarres (2012) and Feizollahi~et~al.~(2014) only considered active redundancy and a single type of component. On the other hand, Feizollahi~et~al.~(2015) only considered cold-standby redundancy with a single type of component, while the multiple component types would violate the optimization structure utilized by the authors. Furthermore, our model is also able to incorporate starting failures for cold-standbys and common-cause failures for active redundancies, respectively, which have not been attempted in any of these related studies.
210  \item Finally, the resulting redundancy optimization problem of our proposed RAP model is equivalent to an MILP problem, which does not induce any binary variables in addition to the original redundancy allocation variables (binaries), while all the above four RAP models result in either bilinear MIP formulations (Feizollahi and Modarres~2012, Feizollahi~et~al.~2014), a linear MIP formulation with additional auxiliary integers (Feizollahi~et~al.~2015), or a highly intractable problem that was approached by metaheuristics (Zhao and Liu~2003).
211\end{itemize}
212
213
214{\bf Robust optimization.} Our work is based on distributionally robust optimization (Wiesemann et al.~2014), which is different from the abovementioned regular robust optimization (Bertsimas and Sim~2004, Ben-Tal~et~al.~2017): the latter focuses on the uncertainty of the actual values of the parameters, while the former concerns the uncertainty of the probability distributions of the parameters and is able to utilize the distributional information. In particular, our developed RAP model belongs to the class of distributionally robust chance-constrained programs (DR-CCPs), where the probability distributions of the uncertain parameters are allowed to vary within a distributional set or {\it Chebyshev ambiguity set}, and the worst-case chance level (reliability level in our context) is protected ({\it i.e.,} required to be above a threshold). In DR-CCPs, the case of joint chance constraint (our model belongs to this case) in general is much more difficult than the single chance constraint (Pr\'{e}kopa~1998), and several approximation approaches have been proposed, for instance, Bonferroni's inequality (Nemirovski and Shapiro~2006, Bertsimas~et~al.~2017), $\phi$-divergence (Yanikoglu and den Hertog~2013) and  conditional value-at-risk (CVaR) (Chen~et~al.~2010, Zymler~et~al.~2012). Most recently, some exact and tractable models have also been developed if the ambiguity set is carefully designed. Hanasusanto~et~al.~(2017) considered a joint constraint of affine functions with uncertainty and an ambiguity set that contains mean and support information, as well as an upper bound of dispersion for the DR-CCP, and they proved that the model is tractable whenever the support set and the dispersion function can be represented via polynomially numerous tractable (linear, conic quadratic and/or semidefinite) constraints.  In addition, Xie and Ahmed~(2018) considered a power flow optimization problem with two-sided chance constraints over the ambiguity set, with only mean and covariance information, and derived a second-order conic representable set for the feasible set of the distributionally robust two-sided chance constraints. Most recently, Wang, et al. (2019) proposed to use Chebyshev ambiguity set with distributional information of mean, support and dispersion to solve a mixed-redundancy strategy RAP. Due to the unique structure of the problem ({\it e.g.,} the mixed cold-standby and active parallel redundancy strategy), the resulting system lifetime function is a joint constraint of nonlinear functions with uncertainty, but Wang, et al. managed to reformulate the model into a linear MIP, where the binary variables are the same as the original redundancy allocation variables, thus the problem becomes tractable.
215
216Our work build on the work of Wang, et al.. In particular, we introduce clustering process on data, and used it to construct conditional ambiguity set. In this way, more information and be incorporated into the ambiguity set, which can lead to more favorable result. We utilize cross validation to improve the clustering result. Our ambiguity set is also very general, and can incorporate any information of the data set that is defined by a convex function. Methodologically, we are also the first to attempt conditional ambiguity set in the distributionally robust chance constrained optimization, which enriches the modeling opportunities for the ambiguous CCPs.
217
218As for our introduction of clustering algorithm to distributionally robust optimization, it is in some way similar to the work of Shang~et~al.~(2017), in which support vector clustering is applied as part of a data-driven robust optimization framework. The incorporation of side information into the clustering process is also a well known technique (Xing~et~al.~2003, Aggrawal~et~al.~2012, Liu~et~al.~2015).
219
220
221
222
223\section{\color{blue}Redundancy Allocation: The Base Problem}{\color{blue}
224In this section, we formally introduce the problem of redundancy allocation. We consider a system that consists of multiple subsystems indexed by $i \in \mathbf{N}$ that are connected in series. If any of the subsystems fails, the whole system fails. Furthermore, the subsystem $i$ consists of multiple types of components, indexed by $j \in \mathbf{M}_i$. In particular, every subsystem adopts a mixed redundancy strategy. That is, components of each type $j$ can be either in cold-standby fashion or active-parallel fashion, which makes up the two subsets of $\mathbf{M}_i=\mathbf{M}^{\rm c}_i \cup \mathbf{M}^{\rm a}_i$, where $\mathbf{M}^{\rm c}_i$ and $\mathbf{M}^{\rm a}_i$ indicate the index sets for cold-standbys and active-parallels, respectively. {\color{red} The cold-standby components begin to work (and thus begin its lifetime) only when its predecessor fails, while the active-parallel components begin their lifetime together.} Finally, each component of type $j$ in subsystem $i$ can have multiple redundant components, indexed by $t \in \mathbf{T}_{ij}$. The configuration of the series system with mixed redundancy strategy is illustrated in Figure~\ref{fig:mixed-redundancy}.
225
226
227
228\begin{figure}[htp]
229\centering
230\includegraphics[scale=0.65]{AC-configuration0819.pdf}
231\caption{\color{red} A series-parallel system of mixed redundancy strategy}\label{fig:mixed-redundancy}
232\label{figure1}
233\end{figure}
234
235
236We denote by $\tilde{z}_{ijt}$ the lifetime random variable of the $t$-th redundant component of type $j$ in subsystem $i$, and use binary variables $x_{ijt}$ to denote that whether this component is used ($x_{ijt}=1$) or not ($x_{ijt}=0$), for $i \in \mathbf{N}, j \in \mathbf{M}_i, t \in \mathbf{T}_{ij}$. Thus, the system lifetime can be expressed as follows:
237$$
238\min\limits_{i \in \mathbf{N}}\left(\sum_{j \in \mathbf{M}^{\rm c}_i}\sum_{t\in \mathbf{T}_{ij}} \tilde{z}_{ijt}x_{ijt} + \max_{j \in \mathbf{M}^{\rm a}_i}  \max_{t \in \mathbf{T}_{ij}}\tilde{z}_{ijt}x_{ijt}\right).
239$$
240Now, we assume that the lifetime distribution
241$$\bm{\tilde{z}}=\left(\tilde{z}_{ijt}\right)_{i \in \mathbf{N}, j \in \mathbf{M}_i, t \in \mathbf{T}_{ij}} \sim \P$$ is known, and the redundancy allocation problem can be formulated as the following chance-constrained optimization problem:
242\begin{equation}\label{HP1-ambiguity-XY}
243\begin{array}{rcll}
244& \min\limits_{\x} &  \sum\limits_{i\in \mathbf{N}} \sum\limits_{j \in \mathbf{M}_i}\left[ \sum\limits_{t\in \mathbf{T}_{ij}} x_{ijt}\right]c_{ij} \\[0.3 cm]
245&{\rm s.t.} & \displaystyle \P\left[\min\limits_{i \in \mathbf{N}}\left(\sum_{j \in \mathbf{M}^{\rm c}_i}\sum_{t\in \mathbf{T}_{ij}} \tilde{z}_{ijt}x_{ijt} + \max_{j \in \mathbf{M}^{\rm a}_i}  \max_{t \in \mathbf{T}_{ij}}\tilde{z}_{ijt}x_{ijt}\right)> \mathcal{T}_R \right]\ge R_{0} & \\[0.3 cm]
246&& L_{i} \le \sum\limits_{j\in \mathbf{M}_i}\sum\limits_{t\in \mathbf{T}_{ij}} x_{ijt}\le  U_{i}, \forall i \in \mathbf{N},\\
247&& \x \in \{0,1\}^{H},
248\end{array}
249\end{equation}
250where $c_{ij}$ is the cost of each type $j$ redundant component in subsystem $i$, $L_i$ and $U_i$ are lower and upper limit of number of redundant component in each subsystem $i$, $\mathcal{T}_R$ is the required lifetime lower bound, and $R_0$ is the designed reliability level required to reach $\mathcal{T}_R$, and \begin{equation}
251H := \sum\limits_{i \in \mathbf{N}}\sum\limits_{j \in \mathbf{M}_i}|\mathbf{T}_{ij}|
252\end{equation}
253is the total number of possible components in the system.
254
255It is noted in the base model that we assume the distribution $\mathbb{P}$ is exactly known for the lifetimes $\bm{\tilde{z}}$ of all components. However, as mentioned in the Introduction, it is often hard in practice to calibrate such a high-dimensional lifetime distribution due to the scarce of the sufficient lifetime data in many practical reliability applications of redundancy systems. Furthermore, although the lifetime (or time-to-failure) observations might not be sufficient, we do have many observations (even realtime observations) on the working conditions related to component lifetimes. Therefore how to incorporate the possible {\it Side Information} to assist learning the lifetime patterns and achieve a more reliability and economic redundancy allocation is critical, especially in the insufficiency of lifetime data.  To this end, we propose in this work a {\it Distributionally Robust Optimization} model for the redundancy allocation that allows the lifetime distribution $\mathbb{P}$ to be ambiguous and is able to incorporate the possible valuable side information with learning. This will be discussed in detail in the forthcoming two sections.
256}
257
258\section{\color{blue}A Learning-based Distributionally Robust Redundancy Model}
259{\color{blue}In this section, we focus on the modeling of redundancy optimization problem with side information incorporated using the distributionally robust optimization technique. In particular, we first introduce the construction of the {\it Ambiguity Set}  with side information knowledge established from the machine learning methods, {\it e.g.}, clustering (Section~\ref{subsec:ambiguityset}). We then develop the resulting distributionally robust chance-constrained model for the redundancy allocation and discuss its tractable reformulation (Section~\ref{subsec:DROmodel}). Finally, we design a {\it Cross Validation} method for choosing the best learning parameter~(Section~\ref{subsec:CV}).
260}
261
262
263
264
265
266\subsection{Constructing ambiguity set with clustering using side information}\label{subsec:ambiguityset}
267{\color{blue}
268As mentioned in the Introduction that in many practical situations, the components of a given type (or even of different types) may fail due to several common causes which can usually reflected by the side information like  producer information, temperature, working intensity, maintenance, to name a few. In other words, these components, once with the side information, can be readily grouped or clustered together, and the resulting clusters or groups should provide valuable information for our redundancy allocation decision. This motivates us to use the {\it Ambiguity Set} with the conditional indicator variables ({\color{red}Chen~et~al.~2019}) to the distributional knowledge incorporated with the side information under the ambiguity.}
269
270{\color{blue}
271In particular, we assume that the true distribution $\P$ of the lifetime random variables $\bm{\tilde{z}}$ is not exactly known or {\em ambiguous}, and only partial information of the lifetime distribution is available. Furthermore, we also assume that some side information related to component lifetimes is also available and we let an indicator random variable $\tilde{k}$ to model the possible clustering knowledge learned from both side information and lifetime observations (which will be explained in detail later). Formally, we define the following distributional ambiguity set $\mathbb{F}$ by incorporating the above mentioned distributional information conditional on the indicator variable $\tilde{k}$, within which the distribution $\P$ is allowed to vary:}
272\begin{equation}\label{ambiguity-set}
273\mathbb{F}_{K}:=\left\{\P \in \mathcal{P}\left( \mathbb{R}_+^H\times[K]\right) \left |
274\begin{array}{ll}
275(\tilde{\bm{z} }, \tilde{k })\sim \P \\[0.3 cm]
276\mathbb{E}_{\P}\Big[\tilde{z}_{ijt}~\Big\vert~\tilde{k}=k\Big]\in \left[\underline{\mu}^{k}_{ij}, \overline{\mu}^{k}_{ij} \right], & \forall k \in [K], i \in \mathbf{N}, j\in \mathbf{M}_i, t \in \mathbf{T}_{ij}\\ [0.3 cm]
277\mathbb{E}_{\P}\Big[ g_{lk}(\bm{\tilde{z}})  ~\Big |~ \tilde{k}=k \Big]\le 0, & \forall k \in [K],  l \in \mathbf{L}_k\\[0.5 cm]
278\P\Big[\bm{\tilde{z}}\in \mathcal{Z}_k ~\Big |~ \tilde{k}=k \Big]=1, & \forall k \in [K]\\[0.3 cm]
279\P\Big[\tilde{k}=k\Big]=p_k, & \forall k \in [K]
280\end{array}\right. \right\}.
281\end{equation}
282{\color{blue}In the above ambiguity set, $K$ is the number of clusters of lifetime pattern learned (which will be explained later and the selection of the hyperparameter $K$ will discussed in Section~\ref{subsec:CV}), the second set of constraints capture the expected lifetime range $[\underline{\mu}^{k}_{ij}, \overline{\mu}^{k}_{ij}]$ of each type $j$ of components in subsystem $i$ in cluster $k$; The forth set of constraints captures the information of conditional support set $\mathcal{Z}_k$ given each cluster $k$, where unless specification we let
283$\mathcal{Z}_k=\mathcal{Z}, \forall k \in [K]$ and
284\begin{equation}\label{equ:Z}
285\mathcal{Z}:= \Big\{\z \in \mathbb{R}^{H}| z_{ijt} \in [\underline{z}_{ij},\overline{z}_{ij}], \forall i \in \mathbf{N}, j \in \mathbf{M}_i, t\in \mathbf{T}_{ij} \Big\}.
286\end{equation}
287The last set of constraints represent the probability $p_k$ of $\bm{z}$ falling within each cluster, which can be estimated upon the data points within each cluster. Finally, the third set of constraints
288\begin{equation}\label{cons:g}
289\mathbb{E}_{\P}\Big[ g_{lk}(\bm{\tilde{z}})~\Big |~ \tilde{k}=k \Big]\le 0, \quad \forall k \in [K],  l \in \mathbf{L}_k,
290\end{equation}
291where each $g_{lk}(\bm{{z}})$ is a convex function of $\bm{{z}}$, are utilized to incorporate, flexibly, more distributional information ({\it e.g.}, correlation) per necessary, for which we illustrate by the following examples.
292\begin{example}[Marginal Variance]\label{ex:ambiguityset-2}
293If we define  in \eqref{cons:g}
294\begin{equation}\label{equ:g-1}
295g_{ijtk}({\bm{z}}):=\left(z_{ijt}-\nu^{k}_{ij} \right)^2 - S^{k}_{ij}, \forall k \in [K], i \in \mathbf{N}, j \in {\mathbf{M}_{i}},
296\end{equation}
297where $\nu^{k}_{ij}, S^{k}_{ij}$ are sample means and standard deviation bounds estimated from the data of each cluster $k$ for each type $j$ in subsystem $i$. This leads to the conditional marginal variance constraints:
298$$
299\mathbb{E}_{\P}\left[\left. \left(\tilde{z}_{ijt}-\nu^{k}_{ij} \right)^2~\right|~ \tilde{k}=k \right]\le S^{k}_{ij}, \forall k \in [K], i \in \mathbf{N}, j\in \mathbf{M}_i.
300$$
301\end{example}
302\begin{example}[Conditional Average Dispersion]\label{ex:ambiguityset-1}
303If we define
304\begin{equation}\label{equ:g-2}
305g_{ijk}({\bm{z}}):=\sum\limits_{t \in \mathbf{T}_{ij}}\left|\frac{ z_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right| - \epsilon^{k}_{ij}, \forall k \in [K], i \in \mathbf{N}, j \in {\mathbf{M}_{i}},
306\end{equation}
307where $\nu^{k}_{ij}, \sigma^{k}_{ij}$ and $\epsilon^{k}_{ij}$ are the sample mean values, standard deviations and dispersion upper bound levels estimated from the data of each cluster $k$. This leads to the conditional average dispersion constraints:
308$$
309\mathbb{E}_{\P}\left[\left. \frac{1}{|\mathbf{T}_{ij} |}\sum\limits_{t \in \mathbf{T}_{ij}}\left|\frac{ \tilde{z}_{ijt}-\nu^{k}_{ij}}{\sigma^{k}_{ij}} \right|  ~\right|~ \tilde{k}=k \right]\le \epsilon^{k}_{ij}, \forall k \in [K], i \in \mathbf{N}, j\in \mathbf{M}_i,\\
310$$
311which captures the modeling requirement that within each cluster $k$, the lifetimes of the same type of components in the same subsystem should be distributionally similar.  We also point out that when $K=1$ ({\it i.e.}, only 1 cluster considered), the model of \eqref{equ:g-2} with $g_{ijk}({\bm{z}})$ reduces to the regular average dispersion constraints employed in Wang~et~al.~(2019).
312\end{example}
313\begin{example}[Conditional Cross Variance]\label{ex:ambiguityset-1}
314If we define
315\begin{equation}\label{equ:g-3}
316g_{ik}({\bm{z}}):=\left(\sum\limits_{j \in {\mathbf{M}_{i}}}\sum\limits_{t \in \mathbf{T}_{ij}}\frac{z_{ijt}}{|\mathbf{T}_{ij}|}-\nu^{k}_{i} \right)^2 - S^k_{i}, \forall k \in [K], i \in \mathbf{N},
317\end{equation}
318where $\nu^{k}_{i}$ and $S^{k}_{i}$ are the estimated sample mean values and upper bound levels of variance of sum of component lifetimes in all types of each subsystem $i$ given cluster $k$. This leads to the conditional cross variance constraints:
319$$
320\mathbb{E}_{\P}\left[\left. \left(\sum\limits_{j \in {\mathbf{M}_{i}}}\sum\limits_{t \in \mathbf{T}_{ij}}\frac{z_{ijt}}{|\mathbf{T}_{ij}|}-\nu^{k}_{i} \right)^2  ~\right|~ \tilde{k}=k \right]\le S^k_{i}, \forall k \in [K], i \in \mathbf{N},
321$$
322which captures the possible correlations among the lifetimes of different types of components.
323\end{example}
324
325
326
327Finally, we elaborate on how the ambiguity set model $\mathbb{F}_K$ can incorporate, in a flexible fashion, the information of lifetime patterns extracted using machine learning approaches of clustering (Hastie~et~al.~2009). }
328
329{\color{blue}
330{\it Direct clustering on Lifetime Sample.} We first point out that even without side information, it is possible to identify more valuable component lifetime patterns by clustering directly over the lifetime sample and we lose nothing by just set $K=1$. For instance, we can use the {\it K-means}, which proves effective and is one of the most popular and unsupervised learning clustering algorithm. A modeling and computational advantage of using K-means for our DRO redundancy allocation model is that the resulting partition of the support $\mathcal{Z}$, which is also called {\it Voronoi Tesselation} (Hastie~et~al.~2009), forms naturally different polyhedral conditional support subsets:
331\begin{equation}\label{k-means-classifier}
332\mathcal{Z}_k := \Big\{\bm{z} \in \mathcal{Z}\:\big|\: 2(\hmu_i - \hmu_k)^{\top}\bm{z} \leq \hmu_{i}^{\top}\hmu_{i} - \hmu_{k}^{\top}\hmu_{k}, \forall i \in [K] \Big\}, \forall k \in [K],
333\end{equation}
334where $\hmu_k$ is the mean of cluster $\mathcal{Z}_k$ which is also the output of the K-means algorithm. This conditional support subsets $\mathcal{Z}_k$'s in \eqref{k-means-classifier} not only partitions the original support (therefore provides more effective conditional support sets), but also enjoy computationally viable geometry of polyhedra.  More details on K-means or clustering implementation issues, {\it e.g.}, using some tie-breaking rules to ensure each data point to belongs to only one cluster, can be found in {\color{red}the work of MacQueen (1967)}.
335}
336
337
338{\color{red}
339{\it Clustering with Dimension Reduction.} In some situations, it may be beneficial to incorporate dimension reduction into the clustering process. In complex systems with very large number of components, dimension reduction may speed up the clustering process. More importantly, if the lifetime ranges of different components vary to a great extent or if the lifetime of components correlate with each other, Euclidean distance on the raw data may not be a suitable distance metric to measure the similarity of data points. Dimension reduction is likely to reveal the underlying structure of the data, and provide a better distance metric, thus resulting in better clusters. There are many well-developed dimension reduction algorithms to be employed. Principle Component Analysis (PCA) is a popular linear dimension reduction algorithm that can execute very fast. t-Distributed Stochastic Neighbor Embedding (t-SNE) is a slower algorithm, but is particularly well-suited to reduce very high dimensional data into low dimensions like 2 or 3. Spectral Clustering is a technique that can combine a standard clustering method (like K-means) and dimension reduction seamlessly together (Ng et al. 2002). }
340
341{\color{blue}
342{\it Clustering based on Side Information.} As we mentioned in the Introduction, in many situations, the data collected on system redundancy design contains not only the samples of component lifetimes, but also the {\it covariates} of side information such as producer information, working conditions, time to last maintenance. These side information should be help for characterizing the lifetime patterns of the components, especially in the scarce of the historical lifetime data, see the following Example~\ref{ex:sideinformation}.
343\begin{example}[value of side information]\label{ex:sideinformation}
344\color{red} Consider an example of a simple railway system consists of two lines, North Line (case 0) and South Line (case 1). The climate alongside the lines is vastly different, which affects the expected lifetime of the braking system on the train. Consider the braking system as a single component, it has expected lifetime of 1 year operating on the North Line and 3 years on the South Line ( $\mathbb{E}_{\P}\Big[\tilde{z}~\Big\vert~\tilde{k}=0\Big] = 1, \mathbb{E}_{\P}\Big[\tilde{z}~\Big\vert~\tilde{k}=1\Big] = 3$). The two lines have same amount of traffic ($p_0 = p_1 = 0.5$). Without side information, we are able to construct the following ambiguity set, using only the mean information:
345
346
347\begin{equation}\label{ambiguity-set-foo}
348\mathbb{F}:=\left\{\P \in \mathcal{P}\left( \mathbb{R}_+\right) \left|
349\begin{array}{ll}
350\tilde{z}\sim \P \\[0.3 cm]
351\mathbb{E}_{\P}\Big[\tilde{z}]\in \left[1.6, 2.4\right]\\ [0.3 cm]
352\P\Big[{\tilde{z}}\in \mathcal{Z}\Big]=1
353\end{array}\right. \right\}.
354\end{equation}
355
356While using side information, the conditional ambiguity set is:
357
358\begin{equation}\label{ambiguity-set-bar}
359\mathbb{F}_{K}:=\left\{\P \in \mathcal{P}\left( \mathbb{R}_+\times[K]\right) \left |
360\begin{array}{ll}
361(\tilde{z}, \tilde{k })\sim \P \\[0.3 cm]
362\mathbb{E}_{\P}\Big[\tilde{z}~\Big\vert~\tilde{k}=0\Big]\in \left[0.8, 1.2\right] \\ [0.3 cm]
363\mathbb{E}_{\P}\Big[\tilde{z}~\Big\vert~\tilde{k}=1\Big]\in \left[2.4, 3.6\right] \\ [0.3 cm]
364\P\Big[\tilde{z}\in \mathcal{Z}_0 ~\Big |~ \tilde{k}=0 \Big]=1\\[0.3 cm]
365\P\Big[\tilde{z}\in \mathcal{Z}_1 ~\Big |~ \tilde{k}=1 \Big]=1\\[0.3 cm]
366\P\Big[\tilde{k}=0\Big]=0.5\\[0.3cm]
367\P\Big[\tilde{k}=1\Big]=0.5\\[0.3cm]
368\end{array}\right. \right\}.
369\end{equation}
370
371Note that the lower bound and upper bound of mean is constructed by shrinking or enlarging the mean by 20\%.
372
373It is clear that the conditional ambiguity set describes the data set better. While the lifetime distributions under different scenarios may not be so distinctive from each other in practice, with the introduction of side information we can incorporate much more information into the ambiguity set, and better describe the structure of the data set.
374
375\end{example}
376
377Therefore, we can extract the lifetime pattern information by clustering collectively both the lifetime data and the side information, using suitable machine learning approaches, such as K-means clustering, hierachical clustering ({\color{red}Sibson 1973}) and regression trees ({\color{red}Quinlan 1986}), per convenience. We can therefore form a number of clusters or scenarios $k\in [K]$ of the component lifetime patterns and achieve a more accurate description for the component lifetimes using the statistical information within each cluster (scenario).
378}
379
380{\color{blue} In the next section, we discuss the distributionally robust redundancy allocation model with ambiguity set $\mathbb{F}_K$ given the number $K$ of clusters, and derive its tractable reformulations.}
381
382
383
384\subsection{The model with tractable reformulation}\label{subsec:DROmodel}{\color{blue}
385With the ambiguity set $\mathbb{F}_K$ of lifetime distributions of all components, we now consider the worst-case probabilistic constraint for safeguarding the reliability level $R_0$, {\it i.e.},
386$$
387\inf\limits_{\P \in \mathbb{F}_{K}}\P\left[\min\limits_{i \in \mathbf{N}}\left(\sum_{j \in \mathbf{M}^{\rm c}_i}\sum_{t\in \mathbf{T}_{ij}} \tilde{z}_{ijt}x_{ijt} + \max_{j \in \mathbf{M}^{\rm a}_i}  \max_{t \in \mathbf{T}_{ij}}\tilde{z}_{ijt}x_{ijt}\right)> \mathcal{T}_S \right]\ge R_{0}
388$$
389in the original redundancy allocation problem~\eqref{HP1-ambiguity-XY}. This leads to the following {\em distributionally robust redundancy optimization (DRRO) model}:
390\begin{equation}\label{HP1-ambiguity-X}
391\begin{array}{rcll}
392& \min\limits_{\x} &  \sum\limits_{i\in \mathbf{N}} \sum\limits_{j \in \mathbf{M}_i}\left[ \sum\limits_{t\in \mathbf{T}_{ij}} x_{ijt}\right]c_{ij} \\[0.3 cm]
393&{\rm s.t.} & \displaystyle \inf\limits_{\P \in \mathbb{F}_{K}}\P\left[\min\limits_{i \in \mathbf{N}}\left(\sum_{j \in \mathbf{M}^{\rm c}_i}\sum_{t\in \mathbf{T}_{ij}} \tilde{z}_{ijt}x_{ijt} + \max_{j \in \mathbf{M}^{\rm a}_i}  \max_{t \in \mathbf{T}_{ij}}\tilde{z}_{ijt}x_{ijt}\right)> \mathcal{T}_S \right]\ge R_{0} & \\[0.3 cm]
394&& L_{i} \le \sum\limits_{j\in \mathbf{M}_i}\sum\limits_{t\in \mathbf{T}_{ij}} x_{ijt}\le  U_{i}, \forall i \in \mathbf{N},\\
395&& \x \in \{0,1\}^H.
396\end{array}
397\end{equation}
398In the above model, the distributionally robust chance constraint essentially ensures to achieve the required reliability level $R_{0}$ over all the qualified probability distributions $\P \in \mathbb{F}_K$.
399
400{\color{red}It can be shown that the conditional ambiguity set ($\mathbb{F}_{K}$) is tighter than the ambiguity set constructed without clustering (which can also be considered as the case when $K = 1$, denoted by $\mathbb{F}_{1}$). Therefore, by incorporating cluster information, we can achieve better result in solving the problem. We present a proof when using marginal variance in example 1. In the following proposition, parameters with subscript or superscript $k$ are parameters in $\mathbb{F}_K$, while the parameters without them are parameters in $\mathbb{F}_1$. $P_K$ is the program \eqref{HP1-ambiguity-X} where $\P \in \mathbb{F}_{K}$, with optimal solution $c^*_K$, and $P_1$ is the program \eqref{HP1-ambiguity-X} where $\P \in \mathbb{F}_{1}$, with optimal solution $c^*_1$.
401\begin{proposition}
402Given ambiguity set $\mathbb{F}_{K}$ and $\mathbb{F}_{1}$, assuming that $\underline{\hmu} = \sum\limits_{k \in [K]}p_k\underline{\hmu}_{k}$, $\overline{\hmu} = \sum\limits_{k \in [K]}p_k\overline{\hmu}_{k}$, $\hmu_k = \frac{\underline{\hmu}_{k} + \overline{\hmu}_{k}}{2}$, $S_{ij}^2 + \mu_{ij}^2 = \sum\limits_{k \in [K]}p_k({S_{ij}^k}^2 + {\mu_{ij}^k}^2), \forall i \in \mathbf{N}, j \in \mathbf{M}_i$, then $\mathbb{F}_{K} \subseteq \mathbb{F}_{1}$, and the optimal solution $c^*_K \leq c^*_1$.
403\end{proposition}
404{\bf Proof.} First, we prove that $\forall \P \in \mathbb{F}_{K}, \P \in \mathbb{F}_{1}$ too.
405%\begin{eqnarray}
406$$
407\begin{aligned}
408\mathbb{E}_{\P}\Big(\tilde{z}_{ijt}\Big) \geq \sum\limits_{k \in [K]}p_k\underline{\hmu}_{ij}^{k} &= \underline{\mu}_{ij} & \forall i \in \mathbf{N}, j \in \mathbf{M}_i, t \in \mathbf{T}_{ij}\\
409\mathbb{E}_{\P}\Big(\tilde{z}_{ijt}\Big) \leq \sum\limits_{k \in [K]}p_k\overline{\hmu}_{ij}^{k} &= \overline{\mu}_{ij} & \forall i \in \mathbf{N}, j \in \mathbf{M}_i, t \in \mathbf{T}_{ij}\\
410\mathbb{E}_{\P}\Big((\tilde{z}_{ijt} - \mu_{ij})^2 \Big) &= \sum\limits_{k \in [K]}p_k\mathbb{E}_{\P}\Big((\tilde{z}_{ijt} - \mu_{ij}^k)^2 | \tilde{k} = k) & \forall i \in \mathbf{N}, j \in \mathbf{M}_i, t \in \mathbf{T}_{ij}\\\\
411&= \sum\limits_{k \in [K]}p_k\mathbb{E}_{\P}\Big((\tilde{z}_{ijt})^2 | \tilde{k} = k\Big) - \mu_{ij}^2&\\
412&\leq \sum\limits_{k \in [K]}p_k\mathbb{E}_{\P}\Big(S_{ij}^2 + {\mu_{ij}^k}^2 | \tilde{k} = k \Big) - \mu_{ij}^2&\\
413&= S_{ij}^2 + {\mu_{ij}}^2 - \mu_{ij}^2&\\
414&= S_{ij}^2&
415\end{aligned}
416$$
417%\end{eqnarray}
418
419Moreover, since $\mathcal{Z}_k \subseteq \mathcal{Z}$ by definition, $\bigcup\limits_{k \in [K]}\mathcal{Z}_k \subseteq \mathcal{Z}$. Therefore $\P[\tilde{z} \in \mathcal{Z}] = 1$. Hence, $\P \in \mathbb{F}_{1}$.
420
421Since $\forall \P \in \mathbb{F}_{K}, \P \in \mathbb{F}_{1}$, $\mathbb{F}_{K} \subseteq \mathbb{F}_{1}$. Therefore any feasible solution $c_1$ to $P_1$ is also a feasible solution to $P_K$. Since $c^*_K$ and $c^*_1$ are optimal solution to a minimization problem, $c^*_K \leq c^*_1$. \blot
422
423The conditions in the proposition are used to require that the the momemnt specifications should be consistent.
424}
425
426
427
428Next, we derive a tractable formulation of the DRRO model \eqref{HP1-ambiguity-X}. Without loss of generality, we assume that $\mathbf{M}^{a}_{i} \neq \emptyset, \forall i \in \mathbf{N}$. The case of $\mathbf{M}^{a}_{i} = \emptyset, \exists i \in \mathbf{N}$, by our forthcoming discussions, can be treated similarly and is actually technically easier.  Also we illustrate our major results using
429$$
430g_{ijk}({\bm{z}}):=\sum\limits_{t \in \mathbf{T}_{ij}}\left|\frac{ z_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right| - \epsilon^{k}_{ij}, \forall k \in [K], i \in \mathbf{N}, j \in {\mathbf{M}_{i}}
431$$
432in the ambiguity set $\mathbb{F}_K$, and consider the conditional support set with K-means
433\begin{equation}\label{equ:W}
434\mathcal{Z}_k = \Big\{\bm{z} \in \mathcal{Z}\:\big|\: 2(\hmu_i - \hmu_k)^{\top}\bm{z} \leq \hmu_{i}^{\top}\hmu_{i} - \hmu_{k}^{\top}\hmu_{k}, \forall i \in [K] \Big\}, \forall k \in [K]
435\end{equation}
436where $\mathcal{Z}$ is the original support set given by \eqref{equ:Z}.
437
438
439We begin by focusing on the computation of the following worst-case reliability function in the problem (\ref{HP1-ambiguity-X}) given system design $\x$:
440\begin{equation}\label{Prob-1}
441\displaystyle \inf\limits_{\P \in \mathbb{F}_K}\P\left[\min\limits_{i \in \mathbf{N}}\left(\sum_{j \in \mathbf{M}^{\rm c}_i}\sum_{t\in \mathbf{T}_{ij}} \tilde{z}_{ijt}x_{ijt} + \max_{j \in \mathbf{M}^{\rm a}_i}  \max_{t \in \mathbf{T}_{ij}}\tilde{z}_{ijt}x_{ijt}\right)> \mathcal{T}_R \right].
442 \end{equation}
443For a better exposition of our approach, we denote by
444\begin{equation}\label{constraint-set}
445\mathcal{Z}_i(\x):=\left\{\bm{z} \in \mathbb{R}^H_+ ~\left|~ \sum_{j \in \mathbf{M}^{\rm c}_i}\sum_{t\in \mathbf{T}_{ij}} \tilde{z}_{ijt}x_{ijt} + \max_{j \in \mathbf{M}^{\rm a}_i}  \max_{t \in \mathbf{T}_{ij}}\tilde{z}_{ijt}x_{ijt} > \mathcal{T}_R \right.\right \}, \forall i \in \mathbf{N}.
446\end{equation}
447
448Since $\mathbf{M}^{a}_{i} \neq \emptyset, \forall i \in \mathbf{N}$, by the nature of maximal value, it is clear that
449\begin{equation}\label{constraint-set-2}
450\mathcal{Z}_i(\x) = \bigcup\limits_{{j} \in \mathbf{M}^{\rm a}_i}\bigcup\limits_{t \in \mathbf{T}_{i{j}}}\left\{\bm{z} \in \mathbb{R}^H_+ ~\left|~ \sum_{{j} \in \mathbf{M}^{\rm c}_i}\sum_{t\in \mathbf{T}_{i{j}}} \tilde{z}_{i{j}{t}}x_{i{j}{t}} + \tilde{z}_{i{j}{t}}x_{i{j}{t}} > \mathcal{T}_R \right.\right \}, \forall i \in \mathbf{N}.
451\end{equation}
452Then the complimentary set of $\mathcal{Z}_i$ for each $i \in \mathbf{N}$, denoted by $\overline{\mathcal{Z}}_i$,  is
453\begin{equation}\label{set-LT}
454\overline{\mathcal{Z}}_i(\x)=\bigcap\limits_{{j} \in \mathbf{M}^{\rm a}_i}\bigcap\limits_{t \in \mathbf{T}_{i{j}}}\left\{\bm{z} \in \mathbb{R}^H_+ ~\left|~ \sum_{{j} \in \mathbf{M}^{\rm c}_i}\sum_{t\in \mathbf{T}_{i{j}}} \tilde{z}_{i{j}{t}}x_{i{j}{t}} + \tilde{z}_{i{j}{t}}x_{i{j}{t}}\leq \mathcal{T}_R \right.\right \}, \forall i \in \mathbf{N}.
455\end{equation}
456The following result establishes an equivalent formulation of regular robust optimization for the above worst-case reliability function~\eqref{Prob-1}.
457
458\begin{lemma}\label{lem1-LT}
459Given system design $\x$, the worst-case reliability function (\ref{Prob-1}) is equivalent to the optimal value of the following  optimization problem:
460\begin{equation}\label{P1-ambiguity-sup-dual1}
461\begin{array}{rcll}
462&\!\!\!\!\!\!\!\!\!\!\!\! \max\limits_{\bm{\alpha}, \bm{\beta}, \bm{\lambda}, \bm{\tau}} &  1-\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}\sum\limits_{k\in[K]} \left(\alpha^{k}_{ijt}\underline{\mu}^{k}_{ij} + \beta^{k}_{ijt}\overline{\mu}^{k}_{ij}\right)-\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i}\sum\limits_{k\in[K]}\epsilon^{k}_{ij}\lambda^{k}_{ij} - \sum\limits_{k\in[K]}\tau_{k} \\[0.3 cm]
463&\!\!\!\!\!\!\!\!\!\!\!\!{\rm s.t.} & \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}} \left[z_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + \left|\frac{ z_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right|\lambda^{k}_{ij}\right] + \tau_k \ge{p_k}, \forall \bm{z} \in  \mathcal{Z}_k\cap \overline{\mathcal{Z}}_i(\x), i \in \mathbf{N}, k \in [K]\\
464&& \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}} \left[z_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + \left|\frac{ z_{ij{t}}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right|\lambda^{k}_{ij}\right] + \tau_k \ge 0, \forall \bm{z} \in  \mathcal{Z}_k, k\in [K]\\
465&& \halpha \le \mathbf{0}, \hbeta, \hlambda \ge \mathbf{0}, \bm{\tau} \in \mathbb{R}^K.
466\end{array}
467\end{equation}
468
469\end{lemma}
470
471%\begin{proof}
472{\bf Proof.}
473With the notation in (\ref{constraint-set}), the worst-case probabilistic chance function (\ref{Prob-1})  can be rewritten in terms of the probability of its complementary event:
474\begin{equation}\label{1minus}
475\inf\limits_{\P\in \mathbb{F}_{K}} \P\Big[\tilde{\bm{z} }\in \mathcal{Z}_i(\x),\forall i \in \mathbf{N} \Big]=1-\sup\limits_{\P \in \mathbb{F}_K } \P\Big[\cup_{i\in \mathbf{N} }\{\bm{\tilde{z}} \in \overline{\mathcal{Z}}_i(\x)\} \Big].
476\end{equation}
477
478Given the probability distribution of  $\tilde{s}$ as
479$$
480\P\Big[\tilde{k}=k\Big]=p_k, \forall k \in [K].
481$$
482We now define $\P_k$ as the conditional distribution of $\bm{\tilde{z}}$ given $\tilde{k}=k$ for $k \in [K]$, we then can decompose any distribution $\P \in \mathbb{F}_K$ using $\{\P_k, k\in [K]\}$ and rewrite the worst-case chance
483$$
484\sup\limits_{\P \in \mathbb{F}_K } \P\Big[\cup_{i\in \mathbf{N} }\{\bm{\tilde{z}} \in \overline{\mathcal{Z}}_i(\x)\} \Big]
485$$
486using total probability law as following formulation:
487\begin{eqnarray}
488\sup\limits_{\P \in \mathbb{F}_K } \P\Big[\cup_{i\in \mathbf{N} }\{\bm{\tilde{z}} \in \overline{\mathcal{Z}}_i(\x)\} \Big]
489&=&\sup\limits_{\P_k, \forall k\in[K]}\sum\limits_{k\in[K]}p_k\P_k\Big[\cup_{i\in \mathbf{N} }\{\bm{\tilde{z}} \in \overline{\mathcal{Z}}_i(\x)\} \Big]\label{P1-ambiguity-sup} \\[0.2 cm]
490&=&\sup\limits_{\P_k}\sum\limits_{k\in[K]}\displaystyle \int_{\cup_{i\in \mathbf{N} }\left\{\bm{\tilde{z}} \in \overline{\mathcal{Z}}_i(\x)\right\}}p_k {\rm d}\P_k(\bm{\tilde{z}}) \\[0.2 cm]
491&&{\rm s.t.}\displaystyle \int_{\bm{\tilde{z}} \in \mathcal{Z}_k} \tilde{z}_{ijt} {\rm d}\P_k(\bm{\tilde{z}}) \ge \underline{\mu}^{k}_{ij},    \forall i \in \mathbf{N}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij}, k\in [K]\\ [0.2 cm]
492&&~\quad \displaystyle \int_{\bm{\tilde{z}} \in \mathcal{Z}_k} \tilde{z}_{ijt} {\rm d}\P_k(\bm{\tilde{z}}) \le \overline{\mu}^{k}_{ij},    \forall i \in \mathbf{N}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij},k\in [K]\\ [0.2 cm]
493&&~\quad  \displaystyle \int_{\bm{\tilde{z}} \in \mathcal{Z}_k} \sum\limits_{t\in \mathbf{T}_{ij}}\left|\frac{\tilde{z}_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right| {\rm d}\P_k(\bm{\tilde{z}}) \le \epsilon^{k}_{ij},   \forall i \in \mathbf{N}, j\in \mathbf{M}_i,k\in [K]\\[0.2 cm]
494&&~\quad \displaystyle \int_{\bm{\tilde{z}} \in \mathcal{Z}_k} {\rm d}\P_k(\bm{\tilde{z}}) =1, \forall k\in[K], \label{P1-ambiguity-sup-2}
495\end{eqnarray}
496where the support $\mathcal{Z}_k$ is given by (\ref{equ:W}). The Lagrange dual of above moment problem \eqref{P1-ambiguity-sup}-\eqref{P1-ambiguity-sup-2} has the following formulation (Wiesemann~et al.~2014):
497\begin{equation}\label{P1-ambiguity-sup-dual0}
498\begin{array}{rcl}
499&\!\!\!\!\!\!\!\!\!\!\!\! \min\limits_{\bm{\alpha}, \bm{\beta}, \bm{\lambda}, \bm{\tau}} &  \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}\sum\limits_{k\in[K]} \left(\alpha^{k}_{ijt}\underline{\mu}^{k}_{ij}+ \beta^{k}_{ijt}\overline{\mu}^{k}_{ij}\right)+\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i}\sum\limits_{k\in[K]}\epsilon^{k}_{ij}\lambda^{k}_{ij} + \sum\limits_{k\in[K]}\tau_{k} \\[0.3 cm]
500&\!\!\!\!\!\!\!\!\!\!\!\!{\rm s.t.} & \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}  \left[z_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + \left|\frac{ z_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right|\lambda^{k}_{ij}\right] + \tau_k \ge p_k \mathbb{I}\Big({\bigcup\limits_{i\in \mathbf{N} }\left\{ \bm{z} \in \overline{\mathcal{Z}}_i\right\}} \Big), \forall \bm{z} \in \mathcal{Z}_k, k \in [K]\\
501&& \bm{\alpha} \le \mathbf{0}, \bm{\beta}, \bm{\lambda} \ge \mathbf{0}, \bm{\tau} \in \mathbb{R}^K,
502\end{array}
503\end{equation}
504where $\mathbb{I}(\mathcal{A})$ is the indicator function with respect to set $\mathcal{A}$, and  $(\halpha, \hbeta, \hlambda, \htau)$ are the dual variables associated with the constraints of the primal problem \eqref{P1-ambiguity-sup}-\eqref{P1-ambiguity-sup-2}.
505
506
507
508Furthermore, we show the strong duality holds. Since ${\mu^{k}_{ij}}$ is the expectation of $\bm{\tilde{z}}_{ijt}$, we can always find a Dirac probability distribution $\P^{\dag}_{\bm{\mu}}$ with $\underline{\hmu}<\hmu<\overline{\hmu}$ which is relative interior point of the feasible set of problem \eqref{P1-ambiguity-sup}-\eqref{P1-ambiguity-sup-2}. Therefore, the Slater condition holds, and then the optimal value of (\ref{P1-ambiguity-sup-dual0}) is equivalent to that of problem \eqref{P1-ambiguity-sup}-\eqref{P1-ambiguity-sup-2}.
509
510
511
512
513
514Next, expanding the indication function $\mathbb{I}\left({\cup_{i\in \mathbf{N} }\left\{\bm{\tilde{z}} \in \overline{\mathcal{Z}}_i(\x)\right\}} \right)$ for different cases of $\bm{z}$, the above problem (\ref{P1-ambiguity-sup-dual0}) is also equivalent to the following formulation:
515\begin{equation}\label{P1-ambiguity-sup-dual00}
516\begin{array}{rcll}
517&\!\!\!\!\!\!\!\!\!\!\!\! \min\limits_{\bm{\alpha}, \bm{\beta}, \bm{\lambda}, \bm{\tau}} &  \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}\sum\limits_{k\in[K]} \left(\alpha^{k}_{ijt}\underline{\mu}^{k}_{ij}+ \beta^{k}_{ijt}\overline{\mu}^{k}_{ij}\right)+\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i}\sum\limits_{k\in[K]}\epsilon^{k}_{ij}\lambda^{k}_{ij} + \sum\limits_{k\in[K]}\tau_{k} \\[0.3 cm]
518&\!\!\!\!\!\!\!\!\!\!\!\!{\rm s.t.} & \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}  \left[z_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + \left|\frac{ z_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right|\lambda^{k}_{ij}\right] + \tau_k \ge p_k, \forall \bm{z} \in \mathcal{Z}_k\cap\overline{\mathcal{Z}}_i, i \in \mathbf{N}, k \in [K]\\
519&& \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}  \left[z_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + \left|\frac{ z_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right|\lambda^{k}_{ij}\right] + \tau_k \ge0, \forall \bm{z} \in \mathcal{Z}_k, k \in [K]\\\nonumber
520&& \halpha \le \mathbf{0}, \hbeta, \hlambda \ge \mathbf{0}, \bm{\tau} \in \mathbb{R}^K.
521\end{array}
522\end{equation}
523Finally, plugging this formulation  into the equation (\ref{1minus}), we arrive at the the formulation of (\ref{P1-ambiguity-sup-dual1}) whose optimal objective value is exactly the worst-case value of probabilistic chance function (\ref{Prob-1}).  The proof is completed. \blot
524%\end{proof}
525
526It is noted that the derived optimization problem (\ref{P1-ambiguity-sup-dual1}) in current version still belongs to semi-infinitely dimensional optimization problems which are not directly computable.  In the following, we show that by duality argument the problem can be further transformed into a computationally tractable formulation of linear program.
527
528\begin{proposition}\label{P-proposition1b}
529Given a system design $\x$, The worst-case reliability function (\ref{Prob-1}) solves the following linear program (LP):
530\begin{eqnarray}
531&\!\!\!\!\!\! \max &  1-\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}\sum\limits_{k\in[K]} \left(\alpha^{k}_{ijt}\underline{\mu}^{k}_{ij} + \beta^{k}_{ijt}\overline{\mu}^{k}_{ij}\right) - \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i}\sum\limits_{k\in[K]}\epsilon^{k}_{ij}\lambda^{k}_{ij} - \sum\limits_{k\in[K]}\tau_{k} \label{HP1-ambiguity-LP-FL} \\
532 &\!\!\!\!\!\!{\rm s.t.} & \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}} \Big[\phi^{lk}_{ijt}\underline{z}_{ij}+\varphi^{lk}_{ijt}\overline{z}_{ij}+{\nu^{k}_{ij}\left(\pi^{lk}_{ijt}-\varpi^{lk}_{ijt} \right)}  \nonumber\\
533  &&+  \sum\limits_{n \in [K]}\psi^{lk}_{n}({(\mu^{n}_{ijt})}^2 - {(\mu^{k}_{ijt})}^2)\Big] + \sum\limits_{j\in \mathbf{M}^{\rm a}_{l}}\sum\limits_{t\in \mathbf{T}_{lj}} q^{lk}_{jt}\mathcal{T}_R+ \tau_k \geq p_k, \forall l \in \mathbf{N}, k \in [K]  \label{HP1-ambiguity-LP-FL1}\\
534  && \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}} \Big[\rho^{k}_{ijt}\underline{z}_{ij}+\varrho^{k}_{ijt}\overline{z}_{ij}+{\nu^{k}_{ij}\left(\gamma^{k}_{ijt}-\theta^{k}_{ijt} \right)} \nonumber\\ && + \sum\limits_{n \in [K]}\varsigma^{k}_{n}({(\mu^{n}_{ijt})}^2 - {(\mu^{k}_{ijt})}^2) \Big] + \tau_k \geq0, \forall k \in [K]\label{HP1-ambiguity-LP-FL1-2}\\
535&&  {q^{lk}_{jt}}x_{l jt}  +\sum\limits_{n \in [K]}2(\mu^{n}_{ljt} - \mu^{k}_{ljt})\psi^{lk}_{n}+ \phi^{lk}_{ljt}+\varphi^{lk}_{ljt}+\pi^{lk}_{ljt}-\varpi^{lk}_{ljt} \nonumber \\ &&  = \alpha^{k}_{ljt}+\beta^{k}_{ljt}, \forall l \in \mathbf{N}, j\in \mathbf{M}^{\rm a}_l, t \in \mathbf{T}_{l j}, k \in [K] \\[0.3 cm]
536&&  x_{l jt}\sum\limits_{j \in \mathbf{M}^{\rm a}_l}\sum\limits_{t \in \mathbf{T}_{l j}}{q^{lk}_{jt}}  +\sum\limits_{n \in [K]}2(\mu^{n}_{ljt} - \mu^{k}_{ljt})\psi^{lk}_{n}+ \phi^{lk}_{ljt}+\varphi^{lk}_{ljt}+\pi^{lk}_{ljt}-\varpi^{lk}_{ljt} \nonumber \\ &&  = \alpha^{k}_{ljt}+\beta^{k}_{ljt}, \forall l \in \mathbf{N}, j\in \mathbf{M}^{\rm c}_l, t \in \mathbf{T}_{l j}, k \in [K] \\[0.3 cm]
537&&   \sum\limits_{n \in [K]}2(\mu^{n}_{ijt} - \mu^{k}_{ijt})\psi^{lk}_{n}+  \phi^{lk}_{ijt}+\varphi^{lk}_{ijt}+\pi^{lk}_{ijt}-\varpi^{lk}_{ijt} = \alpha^{k}_{ijt}+\beta^{k}_{ijt},\nonumber\\ && ~ \forall l \in \mathbf{N}, i \in \mathbf{N}\setminus\{l \}, j \in \mathbf{M}_i, t\in \mathbf{T}_{ij}, k \in [K] \label{HP2-ambiguity-LP-FL2} \\
538&&{|\mathbf{T}_{ij} |\sigma^{k}_{ij}}(\pi^{lk}_{ijt}+\varpi^{lk}_{ijt}) =\lambda^{k}_{ij},  ~ \forall l  \in \mathbf{N}, i \in \mathbf{N}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij}, k \in [K] \\
539&& \sum\limits_{n \in [K]}2(\mu^{n}_{ijt} - \mu^{k}_{ijt})\varsigma^{k}_{n}+ \rho^{k}_{ijt}+\varrho^{k}_{ijt}+\gamma^{k}_{ijt}-\theta^{k}_{ijt} = \alpha^{k}_{ijt}+\beta^{k}_{ijt},\nonumber\\ && ~ \forall i \in \mathbf{N}, j \in \mathbf{M}_i, t\in \mathbf{T}_{ij}, k \in [K]  \\
540&&  {|\mathbf{T}_{ij} |\sigma^{k}_{ij}}(\gamma^{k}_{ijt}+\theta^{k}_{ijt}) =\lambda^{k}_{ij},  ~ \forall i \in \mathbf{N}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij}, k \in [K] \\
541%&&  q_{l jk}\le y_{{l jk}}, ~\forall l  \in \mathbf{N}, j \in \mathbf{M}_{l}, k\in \mathcal{N}(l,j) \\[0.3 cm]
542%&& y_{{l jk}} \ge M x_{l jk}, ~\forall l  \in \mathbf{N}, j \in \mathbf{M}_{l}, k\in \mathcal{N}(l,j) \\[0.3 cm]
543%&& y_{{l jk}} \le  q_{l jk }+(x_{l jk}-1)M, ~\forall l  \in \mathbf{N}, j \in \mathcal{J}({l}), k \in \mathcal{N}(l,j)\\[0.3 cm]
544%&&  \sum\limits_{k\in \mathbf{K}_{ij}} x^{k}_{ij}\ge  L_{ij}, ~  \forall  i \in \mathbf{N}, j\in \mathbf{M}_i \\
545&& \halpha,\q, \hpsi, \hvarphi, \hvarsigma, \hvarrho \le \mathbf{0}, \htau \in \mathbb{R}^K,  \\
546&& \hbeta, \hlambda, \hphi, \hrho, \hpi,\hvarpi, \hgamma, \htheta \ge \mathbf{0}, \label{HP2-ambiguity-LP-FL}
547\end{eqnarray}
548where $\halpha, \hbeta, \hlambda, \htau, \q, \hpsi, \hphi, \hvarphi, \hpi, \hvarpi, \hrho, \hvarrho, \hvarsigma, \hgamma, \htheta$ are auxiliary variables.
549\end{proposition}
550
551%\begin{proof}
552{\bf Proof. }
553First of all, for a given $l  \in \mathbf{N}$ we deal with the infinitely dimensional constraints
554$$
555\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}  \left[z_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + \left|\frac{ z_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right|\lambda^{k}_{ij}\right] + \tau_k \ge p_k, \forall \bm{z} \in \mathcal{Z}_k\cap\overline{\mathcal{Z}}_l, k \in [K].
556$$
557Recall that
558\begin{equation}
559\overline{\mathcal{Z}}_l(\x)=\bigcap\limits_{{j} \in \mathbf{M}^{\rm a}_l}\bigcap\limits_{{t} \in \mathbf{T}_{l{j}}}\left\{\bm{z} \in \mathbb{R}^H_+ ~\left|~ \sum_{{j} \in \mathbf{M}^{\rm c}_l}\sum_{{t}\in \mathbf{T}_{l{j}}} \tilde{z}_{l{j}{t}}x_{l{j}{t}} + \tilde{z}_{l{j}{t}}x_{l{j}{t}}\leq \mathcal{T}_R \right.\right \}
560\end{equation}
561\begin{equation}
562=\left\{\bm{z} \in \mathbb{R}^H_+ ~\left|~ \sum_{j \in \mathbf{M}^{\rm c}_l}\sum_{{t}\in \mathbf{T}_{l{j}}} \tilde{z}_{l{j}{t}}x_{l{j}{t}} + \tilde{z}_{l{j}{t}}x_{l{j}{t}}\leq \mathcal{T}_R, \forall j \in \mathbf{M}^{\rm a}_l, {t}\in \mathbf{T}_{l{j}}  \right.\right \},
563\end{equation}
564and
565$$
566\mathcal{Z}_k = \left\{\bm{z} \in \mathbb{R}^{H} \left| \begin{array}{l}
567                                                        2(\hmu_i - \hmu_k)^{\top}\bm{z} \leq \hmu_{i}^{\top}\hmu_{i} - \hmu_{k}^{\top}\hmu_{k}, \forall i \in [K]  \\[0.25 cm]
568                                                        z_{ijt} \in [\underline{z}_{ij},\overline{z}_{ij}], \forall i \in \mathbf{N}, j \in \mathbf{M}_i, t\in \mathbf{T}_{ij}
569                                                       \end{array}
570\right.\right\}
571$$
572for any $k \in [K]$.
573
574First of all, we claim that for any $k \in [K]$
575\begin{equation}\label{Lifting-1}
576\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}  \left[z_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + \left|\frac{ z_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right|\lambda^{k}_{ij}\right] + \tau_k \ge p_k, \forall \bm{z} \in \mathcal{Z}_k\cap\overline{\mathcal{Z}}_l
577\end{equation}
578is equivalent to
579\begin{equation}\label{Lifting-2}
580\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}  \left[z_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + u^{k}_{ijt}\lambda^{k}_{ij}\right] + \tau_k \ge p_k, \forall (\bm{z},\u) \in \mathcal{W}_k,
581\end{equation}
582{\color{red} where
583$$
584\mathcal{W}_k := \left\{(\z, \u) \middle| \: \z \in \mathcal{Z}_k\cap\overline{\mathcal{Z}}_l, \left|\frac{ z_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right| \leq u^{k}_{ijt}, \forall i \in \mathbf{N}, j \in \mathbf{M}_i, t \in \mathbf{T}_{ij} \right\}, \forall k \in [K].
585$$
586}
587In fact, on the one hand, if \eqref{Lifting-1} holds, since $\hlambda \geq \mathbf{0}$, we have for any $(\bm{z}, \u) \in \mathcal{W}_k$
588$$
589u^{k}_{ijt}\lambda^{k}_{ij} \geq \left|\frac{ z_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right|\lambda^{k}_{ij}, \forall i \in \mathbf{N}, j \in \mathbf{M}_i, t \in \mathbf{T}_{ij}.
590$$
591Therefore \eqref{Lifting-2} holds. On the other hand, assume \eqref{Lifting-2} holds, then for any $\hat{\bm{z}} \in \mathcal{Z}_k\cap\overline{\mathcal{Z}}_l$, we take
592$$
593u^{k}_{ijt} = \left|\frac{ \hat{z}_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right|, \forall i \in \mathbf{N}, j \in \mathbf{M}_i, t \in \mathbf{T}_{ij}.
594$$
595Then we have
596$$
597\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}  \left[\hat{z}_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + \left|\frac{ \hat{z}_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right|\lambda^{k}_{ij}\right] + \tau_k
598=\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}  \left[\hat{z}_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + u^{k}_{ijt}\lambda^{k}_{ij}\right] + \tau_k \geq p_k.
599$$
600Therefore \eqref{Lifting-1} holds, and \eqref{Lifting-1} and \eqref{Lifting-2} are equivalent.
601
602Then, by introducing auxiliary variables $u^{k}_{ijt}, \forall i \in \mathbf{N}, j \in \mathbf{M}_i, t\in \mathbf{T}_{ij}$, we can equivalently lift the above constraints into the following optimization-based formulation:
603\begin{equation}\label{H-system1}
604\left.\begin{array}{rcll}
605  p_k-\tau_k\le & \min\limits_{\bm{z}, \u} & \displaystyle \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}} \left[z_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + u^{k}_{ijt} \lambda^{k}_{ij}\right]  \\[0.3 cm]
606&{\rm s.t.} & \displaystyle   {\sum_{j \in \mathbf{M}^{\rm c}_l}\sum_{{t}\in \mathbf{T}_{l{j}}} \tilde{z}_{l{j}{t}}x_{l{j}{t}} + \tilde{z}_{l{j}{t}}x_{l{j}{t}}\leq \mathcal{T}_R} & \forall {j} \in \mathbf{M}^{\rm a}_l, {t}\in \mathbf{T}_{l{j}}  \\[0.3 cm]
607&&  {|\mathbf{T}_{ij} |\sigma^{k}_{ij}}u^{k}_{ijt}- { z_{ijt}} \ge { -\nu^{k}_{ij}}, & \forall i \in \mathbf{N}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij} \\[0.3 cm]
608&&  {|\mathbf{T}_{ij} |\sigma^{k}_{ij}}u^{k}_{ijt}+  { z_{ijt}} \ge  {\nu^{k}_{ij} }, & \forall i \in \mathbf{N}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij} \\[0.3 cm]
609&&  \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}} 2(\mu^{n}_{ijt} - \mu^{k}_{ijt})z_{ijt}\\[0.3 cm]
610&& \leq \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}} {(\mu^{n}_{ijt})}^2 - {(\mu^{k}_{ijt})}^2, & \forall n \in [K]\\[0.3 cm]
611&& \underline{z}_{ij} \leq z_{ijt} \leq \overline{z}_{ij} & \forall i \in \mathbf{N}, j \in \mathbf{M}_i, t\in \mathbf{T}_{ij}\\[0.3cm]
612&& u^{k}_{ijt} \in \mathbb{R}, & \forall i \in \mathbf{N}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij}.
613\end{array}\right\}, \forall k \in [K]
614\end{equation}
615
616By the strong duality of linear programming, the above constraint is also equivalent to the following system: for all $k$ in $[K]$,
617\begin{equation*}
618\left\{  \begin{array}{rl}
619& p_k-\tau_k\le \sum\limits_{j\in \mathbf{M}^{\rm a}_{l}}\sum\limits_{t\in \mathbf{T}_{lj}} q^{lk}_{jt}\mathcal{T}_R\\[0.3 cm]
620& +\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}} \left[\phi^{lk}_{ijt}\underline{z}_{ij}+\varphi^{lk}_{ijt}\overline{z}_{ij}+{\nu^{k}_{ij}\left(\pi^{lk}_{ijt}-\varpi^{lk}_{ijt} \right)}  + \sum\limits_{n \in [K]}\psi^{lk}_{n}({(\mu^{n}_{ijt})}^2 - {(\mu^{k}_{ijt})}^2)\right]  \\[0.3 cm]
621&  {q^{lk}_{jt}}x_{l jt}  +\sum\limits_{n \in [K]}2(\mu^{n}_{ljt} - \mu^{k}_{ljt})\psi^{lk}_{n}+ \phi^{lk}_{ljt}+\varphi^{lk}_{ljt}+\pi^{lk}_{ljt}-\varpi^{lk}_{ljt} \nonumber \\ &  = \alpha^{k}_{ljt}+\beta^{k}_{ljt}, \forall j\in \mathbf{M}^{\rm a}_l, t \in \mathbf{T}_{l j} \\[0.3 cm]
622&  x_{ljt}\sum\limits_{j \in \mathbf{M}^{\rm a}_l}\sum\limits_{t \in \mathbf{T}_{l j}}{q^{lk}_{jt}}  +\sum\limits_{n \in [K]}2(\mu^{n}_{ljt} - \mu^{k}_{ljt})\psi^{lk}_{k}+ \phi^{lk}_{ljt}+\varphi^{lk}_{ljt}+\pi^{lk}_{ljt}-\varpi^{lk}_{ljt} \nonumber \\ &  = \alpha^{k}_{ljt}+\beta^{k}_{ljt}, \forall j\in \mathbf{M}^{\rm c}_l, t \in \mathbf{T}_{l j} \\[0.3 cm]
623&   \sum\limits_{n \in [K]}2(\mu^{n}_{ijt} - \mu^{k}_{ijt})\psi^{lk}_{n}+  \phi^{lk}_{ijt}+\varphi^{lk}_{ijt}+\pi^{lk}_{ijt}-\varpi^{lk}_{ijt} = \alpha^{k}_{ijt}+\beta^{k}_{ijt}, ~ \forall i \in \mathbf{N}\setminus\{l\}, j \in \mathbf{M}_i, t\in \mathbf{T}_{ij}  \\[0.3 cm]
624&{|\mathbf{T}_{ij} |\sigma^{k}_{ij}}(\pi^{lk}_{ijt}+\varpi^{lk}_{ijt}) =\lambda^{k}_{ij},  ~ \forall i \in \mathbf{N}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij} \\[0.3 cm]
625&   q^{lk}_{jt}\le 0, ~\forall j\in \mathbf{M}^{\rm a}_l, t\in \mathbf{T}_{lj}\\& \psi^{lk}_{n} \le 0, \phi^{lk}_{ijt} \geq 0, \varphi^{lk}_{ijt} \leq 0, \pi^{lk}_{ijt}\ge 0,\varpi^{lk}_{ijt}\ge 0, ~\forall n \in [K], i \in \mathbf{N}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij}.
626\end{array}
627\right\}
628\end{equation*}
629
630
631
632
633Likewise, the constraints
634$$
635\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}  \left[z_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + \left|\frac{ z_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right|\lambda^{k}_{ij}\right] + \tau_k \ge0, \forall \bm{z} \in \mathcal{Z}_k, k \in [K]\\
636$$
637can also be dualized similarly.  Leveraging on the derived formulation (\ref{P1-ambiguity-sup-dual1}) in Lemma~\ref{lem1-LT}, we can arrive at the formulation of the linear program (\ref{HP1-ambiguity-LP-FL})--(\ref{HP2-ambiguity-LP-FL}). We are done. \blot
638%\end{proof}
639
640
641Leveraging on the derived reformulation (\ref{HP1-ambiguity-LP-FL})-(\ref{HP2-ambiguity-LP-FL}) of linear program, we can readily linearize the
642bilinear terms $q^{lk}_{jt}x_{l jt}$ in the overall DRRO model~\eqref{HP1-ambiguity-X} using standard MIP techniques. This results in the following mixed integer linear program (MILP) reformulation for the DRRO model~\eqref{HP1-ambiguity-X}. In particular, the advantage of the resulting MIP reformulation is that it does increase the number of integers.
643
644
645\begin{proposition}\label{proposition1b}
646The problem (\ref{HP1-ambiguity-X}) is equivalent to the following mixed integer linear program:
647\begin{eqnarray}
648 & \min\limits_{\x} &  \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \left[\sum\limits_{t\in \mathbf{T}_{ij}} x_{ijt}\right]c_{ij} \label{HP1-ambiguity-MILP-FL1}\\
649 &{\rm s.t.} & 1-\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}\sum\limits_{k\in[K]} \left(\alpha^{k}_{ijt}\underline{\mu}^{k}_{ij}+ \beta^{k}_{ijt}\overline{\mu}^{k}_{ij}\right)\nonumber\\
650 && -\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i}\sum\limits_{k\in[K]}\epsilon^{k}_{ij}\lambda^{k}_{ij} - \sum\limits_{k\in[K]}\tau_{k}\ge R_{0}   \\
651 &&  L_{i} \le \sum\limits_{j\in \mathbf{M}_i}\sum\limits_{t\in \mathbf{T}_{ij}} x_{ijt}\le  U_{i}, ~  \forall  i \in \mathbf{N} \\
652%&& \sum\limits_{j\in \mathbf{M}_{l}} \sum\limits_{k\in \mathbf{K}_{l j}}q_{l jk}\mathcal{T}_R\nonumber\\
653% &&+\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{k\in \mathbf{K}_{ij}} \left[ \phi^{l }^{k}_{ij}\underline{z}_{ij}+ \varphi^{\varsigma}^{k}_{ij}\overline{z}_{ij}  + {\nu_{ij}\left(\pi^{l }^{k}_{ij}-\varpi^{l }^{k}_{ij} \right)} \right]+\tau \ge 1,~\forall {l  \in \mathbf{N}}  \\
654&& y^{{\rm a}lk}_{jt}  +\sum\limits_{n \in [K]}2(\mu^{n}_{ljt} - \mu^{k}_{ljt})\psi^{lk}_{n}+ \phi^{lk}_{ljt}+\varphi^{lk}_{ljt}+{ \pi^{lk}_{ljt}-\varpi^{lk}_{ljt} } = \alpha^{k}_{ljt}+\beta^{k}_{ljt},\nonumber\\ &&  \forall l \in \mathbf{N}, j\in \mathbf{M}^{\rm a}_l, t \in \mathbf{T}_{l j}, k \in [K]  \label{con:32}  \\
655&& y^{{\rm c}lk}_{jt}  +\sum\limits_{n \in [K]}2(\mu^{n}_{ljt} - \mu^{k}_{ljt})\psi^{lk}_{n}+ \phi^{lk}_{ljt}+\varphi^{lk}_{ljt}+\pi^{lk}_{ljt}-\varpi^{lk}_{ljt}  = \alpha^{k}_{ljt}+\beta^{k}_{ljt},\nonumber\\ &&  \forall l \in \mathbf{N}, j\in \mathbf{M}^{\rm c}_l, t \in \mathbf{T}_{l j}, k \in [K]    \\
656 && (\ref{HP1-ambiguity-LP-FL1}-\ref{HP1-ambiguity-LP-FL1-2}); (\ref{HP2-ambiguity-LP-FL2})-(\ref{HP2-ambiguity-LP-FL})\\
657&&  q^{lk}_{jt}\le y^{{\rm a}lk}_{jt}, ~\forall l  \in \mathbf{N}, j \in \mathbf{M}^{\rm a}_{l}, t\in \mathbf{T}_{l j}, k \in [K] \\
658&& y^{{\rm a}lk}_{jt} \ge M x_{l jt}, ~\forall l  \in \mathbf{N}, j \in \mathbf{M}^{\rm a}_{l}, t\in \mathbf{T}_{l j}, k \in [K] \\
659&& y^{{\rm a}lk}_{jt} \le  q^{lk}_{jt}+(x_{l jt}-1)M, ~\forall l  \in \mathbf{N}, j \in \mathbf{M}^{\rm a}_{l}, t \in \mathbf{T}_{l j}, k \in [K]\\
660&&  \sum\limits_{j \in \mathbf{M}^{\rm a}_{l}}\sum\limits_{t\in \mathbf{T}_{l j}}q^{lk}_{jt}\le y^{{\rm c}lk}_{jt}, ~\forall l  \in \mathbf{N},j \in \mathbf{M}^{\rm c}_{l}, t\in \mathbf{T}_{l j}, k \in [K] \\
661&& y^{{\rm c}lk}_{jt} \ge M x_{l jt}, ~\forall l  \in \mathbf{N}, j \in \mathbf{M}^{\rm c}_{l}, t\in \mathbf{T}_{l j}, k \in [K] \\
662&& y^{{\rm c}lk}_{jt} \le  \sum\limits_{j \in \mathbf{M}^{\rm a}_{l}}\sum\limits_{t\in \mathbf{T}_{l j}}q^{lk}_{jt}+(x_{l jt}-1)M, ~\forall l  \in \mathbf{N}, j \in \mathbf{M}^{\rm c}_{l}, t \in \mathbf{T}_{l j}, k \in [K]\\
663&& \y^{\rm a}, \y^{\rm c} \le \mathbf{0}, \x\in \{0,1\}^{H},  \label{HP1-ambiguity-MILP-FL2}
664\end{eqnarray}
665where $\halpha, \hbeta, \hlambda, \htau, \q, \hpsi, \hphi, \hvarphi, \hpi, \hvarpi, \hrho, \hvarrho, \hvarsigma, \hgamma, \htheta, \y^{\rm a}$ and $\y^{\rm c}$ are auxiliary variables and $M$ is a sufficiently small negative number.
666\end{proposition}
667%\begin{proof}
668{\bf Proof. }
669In the proof of Proposition~\ref{P-proposition1b}, the feasible set $\mathcal{Z}_k\cap \overline{\mathcal{Z}}_{l }(\x)$ of the minimization problem
670$$
671\min\limits_{\bm{z}} \displaystyle \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}} \left[z_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + \left|\frac{ z_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right| \lambda^{k}_{ij}\right]
672$$
673is bounded. Assuming that it is nonempty, then its lifted equivalent form of the inner minimization problem in (\ref{H-system1}) is also bounded and nonempty. Therefore, the dual variables $q^{lk}_{jt}$ are also bounded. Therefore, we can linearize the bilinear terms by introducing new variables $y^{{\rm a}lk}_{jt}$ and $y^{{\rm c}lk}_{jt}$, such that
674\begin{eqnarray}
675&&  q^{lk}_{jt}\le y^{{\rm a}lk}_{jt}, ~\forall l  \in \mathbf{N}, j \in \mathbf{M}^{\rm a}_{l}, t\in \mathbf{T}_{l j}, k \in [K] \nonumber\\
676&& y^{{\rm a}lk}_{jt} \ge M x_{l jt}, ~\forall l  \in \mathbf{N}, j \in \mathbf{M}^{\rm a}_{l}, t\in \mathbf{T}_{l j}, k \in [K] \nonumber\\
677&& y^{{\rm a}lk}_{jt} \le  q^{lk}_{jt}+(x_{l jt}-1)M, ~\forall l  \in \mathbf{N}, j \in \mathbf{M}^{\rm a}_{l}, t \in \mathbf{T}_{l j}, k \in [K]\nonumber\\
678&&  \sum\limits_{j \in \mathbf{M}^{\rm a}_{l}}\sum\limits_{t\in \mathbf{T}_{l j}}q^{lk}_{jt}\le y^{{\rm c}lk}_{jt}, ~\forall l  \in \mathbf{N}, , j \in \mathbf{M}^{\rm c}_{l}, t\in \mathbf{T}_{l j}, k \in [K] \nonumber\\
679&& y^{{\rm c}lk}_{jt} \ge M x_{l jt}, ~\forall l  \in \mathbf{N}, j \in \mathbf{M}^{\rm c}_{l}, t\in \mathbf{T}_{l j}, k \in [K] \nonumber\\
680&& y^{{\rm c}lk}_{jt} \le  \sum\limits_{j \in \mathbf{M}^{\rm a}_{l}}\sum\limits_{t\in \mathbf{T}_{l j}}q^{lk}_{jt}+(x_{l jt}-1)M, ~\forall l  \in \mathbf{N}, j \in \mathbf{M}^{\rm c}_{l}, t \in \mathbf{T}_{l j}, k \in [K],\nonumber
681\end{eqnarray}
682where $M$ is a sufficiently small negative number (in numerical computation, M can be set to a negative number with very large absolute value).  Using this linearization technique, we can arrive at the MILP  reformulation \eqref{HP1-ambiguity-MILP-FL1}-\eqref{HP1-ambiguity-MILP-FL2} for the DRRO model (\ref{HP1-ambiguity-X}). \blot
683
684}
685
686
687\subsection{Learning the clustering parameter $K$ by Cross Validation}\label{subsec:CV}{\color{blue}
688It is known that the clustering approaches ({e.g.}, K-means) in general belong to the {\it unsupervised learning} methods (Hastie~et~al.~2009), for which there is no labeled loss function for validating the parameter $K$\footnotemark \footnotetext{One of the popular methods to choose $K$ is the elbow method ({\color{red}Thorndike 1953, Ketchen et al. 1996}), in which the ratio of in-class variance and total variance is plotted, and optimal $K$ is chosen by observing the plot, which apparently depends partially on subjective understanding of the data.}.
689
690Interestingly, our distributionally robust redundancy optimization framework with lifetime pattern clustering (of hyper-parameter $K$) can essentially be treated as a {\it supervised learning} from statistical learning perspective ({\color{red} Friedman et al. 2001, James et al. 2013}). In particular, the proposed optimization framework provides two labels of (i) redundancy cost and (ii) feasibility of reliability requirement, in the DRRO model~\eqref{HP1-ambiguity-X}. This important feature of our framework enables us to design {\it Cross Validation} to choose the {\it best} parameter $K$. Specifically, considering the problem structure of chance constrained optimization where the objective of  redundancy cost and feasibility of constraint (reliability) needs to be balanced, where the balance actually reflects the user's design preference in between the two. This motivates us introduce a preference level $\lambda$ to design the Cross Validation. Specifically, for each choice of $K$, we consider the number of constraint violation in cross validation, as well as its average design cost. We normalize both into the range of $[0, 1]$ separately, and combine them linearly by the coefficient $\lambda$. Low $\lambda$ means that the designer cares more about low cost than robustness (reliability); high $\lambda$ indicates the contrary. By applying this metric, cross validation can help choosing $K$ to find a balance of cost and robustness as per necessary. The procedures of cross validation are summarized in the following Algorithm 1.
691
692\noindent\rule{\textwidth}{0.1em}\vspace{-5pt}\\
693\noindent {\bf Algorithm 1. Cross Validation for Selecting $K$} \vspace{-10pt}\\
694\noindent\rule{\textwidth}{0.05em} \\
695{\bf Input:} Data sample $\mathcal{D}$, preference level $\lambda$ and the set of possible candidates for $K$: $\{K_l, l \in [L]\}$.
696
697\begin{enumerate}
698\item  Split $\mathcal{D}$ into $M$ disjoint subsets $\mathcal{D}_m, m \in [M]$, each of them have equal size.
699\item  For each $K_l, l \in [L]$:
700\begin{enumerate}
701  \item Training: For each subset $\mathcal{D}_m, m \in [M]$, compute the optimal solution $\x^{*}(\mathcal{D}_m, K_l)$ and associated optimal cost $c(\mathcal{D}_m, K_l)$ by solving the model, using $\mathcal{D} \setminus \mathcal{D}_m$ as input data and split it into $K_l$ clusters.
702  \item Validation: For each $m \in [M]$: For $\mathcal{D}_m$, use it as validation set to compute $R(\x^{*}(\mathcal{D}_m, K_l))$, the reliability level under design $\x^{*}(\mathcal{D}_m, K_l)$. If $R(\x^{*}(\mathcal{D}_m, K_l)) < R_0$, count it as one instance of violation. Sum up the total number of violations across all $m \in [M]$, that is, $$v(K_l) := \sum\limits_{m \in [M]}\mathbb{I}\Big[ R(\x^{*}(\mathcal{D}_m, K_l)) < R_0 \Big],$$
703      where $\mathbb{I}[\cdot]$ is the indicator function. Compute average cost as $$\overline{c}(K_l) := \frac{1}{n}\sum\limits_{m \in M}c(\mathcal{D}_m, K_l)$$
704  \item Compute the combined metric $\lambda{v( K_l)} + (1-\lambda)\overline{c}(K_l)$. Let $$K_{\rm opt} := \underset{K_l}{\mathrm{arg\,min}}\,\lambda{v(K_l)} + (1-\lambda)\overline{c}(K_l).$$
705\end{enumerate}
706\end{enumerate}
707\noindent{\bf Output:} The optimal value $K_{\rm opt}$ for parameter $K$.	
708
709\vspace{-5pt}
710\noindent\rule{\textwidth}{0.1em}\vspace{-17pt}\\
711
712}
713\section{A Supergradient-based Decomposition Algorithm}{\color{blue}
714Although the linearized MIP formulation is tractable, when $K$ is large, it may take long time to solve it due to the large number of constraints. In this subsection we provide an iterative algorithm that exploits the structure of the problem, that decomposed that problem into subproblems that can be parallelized.
715
716We first introduce $\y^a$ and $\y^c$ to separate $\q$ and $\x$ in \eqref{HP1-ambiguity-LP-FL}, by the same approach in the proof of proposition 3. Then, computing its dual,
717we obtain the following formulation:
718
719\begin{eqnarray} \label{dual}
720&\!\!\!\!\!\! \min &
721\sum\limits_{l\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}^{a}_l} \sum\limits_{t\in \mathbf{T}_{lj}}\sum\limits_{k\in[K]}(\gamma^{lk}_{jt}+\delta^{lk}_{jt})Mx_{ljt} +
722\sum\limits_{l\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}^{c}_l} \sum\limits_{t\in \mathbf{T}_{lj}}\sum\limits_{k\in[K]}(\lambda^{lk}_{jt}+\tau^{lk}_{jt})Mx_{ljt} \nonumber\\
723&& + \sum\limits_{l\in \mathbf{N}}\sum\limits_{k\in[K]}{p_k}{\alpha^{l}_k} -
724\sum\limits_{l\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}^{a}_l} \sum\limits_{t\in \mathbf{T}_{lj}}\sum\limits_{k\in[K]}\delta^{lk}_{jt}M -
725\sum\limits_{l\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}^{c}_l} \sum\limits_{t\in \mathbf{T}_{lj}}\sum\limits_{k\in[K]}\tau^{lk}_{jt}M \\
726&\!\!\!\!\!\!{\rm s.t.} & \rho^{ik}_{jt}+\sum\limits_{l \in \mathbf{N} \setminus \{i\}}\phi^{lk}_{ijt} + \varphi^{k}_{ijt} \ge \underline{\mu}^{k}_{ij}, \forall i \in \mathbf{N}, j\in \mathbf{M}^{a}_i, t\in \mathbf{T}_{ij}, k\in[K]\label{model-c-1}\\
727&& \varrho^{ik}_{jt}+\sum\limits_{l \in \mathbf{N} \setminus \{i\}}\phi^{lk}_{ijt} + \varphi^{k}_{ijt} \ge \underline{\mu}^{k}_{ij}, \forall i \in \mathbf{N}, j\in \mathbf{M}^{c}_i, t\in \mathbf{T}_{ij}, k\in[K]\\
728&& \rho^{ik}_{jt}+\sum\limits_{l \in \mathbf{N} \setminus \{i\}}\phi^{lk}_{ijt} + \varphi^{k}_{ijt} \le \overline{\mu}^{k}_{ij}, \forall i \in \mathbf{N}, j\in \mathbf{M}^{a}_i, t\in \mathbf{T}_{ij}, k\in[K]\\
729&& \varrho^{ik}_{jt}+\sum\limits_{l \in \mathbf{N} \setminus \{i\}}\phi^{lk}_{ijt} + \varphi^{k}_{ijt} \le \overline{\mu}^{k}_{ij}, \forall i \in \mathbf{N}, j\in \mathbf{M}^{c}_i, t\in \mathbf{T}_{ij}, k\in[K]\\
730&& \sum\limits_{l \in \mathbf{N}}\sum\limits_{t\in \mathbf{T}_{ij}}\pi^{lk}_{ijt}+\sum\limits_{t\in \mathbf{T}_{ij}}\varpi^{k}_{ijt} \le \epsilon^{k}_{ij}, \forall i \in \mathbf{N}, j\in \mathbf{M}_i, k\in[K]\\
731&& \sum\limits_{i \in \mathbf{N}}\alpha_{ik} + \beta_{k} = -1, \forall k \in [K]\\
732&& \rho^{ik}_{jt}-\theta^{ik}_{jt}+\gamma^{ik}_{jt}+\delta^{ik}_{jt}\le 0, \forall i \in \mathbf{N}, j\in \mathbf{M}^{a}_i, t\in \mathbf{T}_{ij}, k\in[K]\\
733&& \varrho^{ik}_{jt}-\vartheta^{ik}_{jt}+\lambda^{ik}_{jt}+\tau^{ik}_{jt}\le 0, \forall i \in \mathbf{N}, j\in \mathbf{M}^{c}_i, t\in \mathbf{T}_{ij}, k\in[K]\\
734&& \mathcal{T}_R\alpha_{ik} + \theta^{ik}_{jt} - \delta^{ik}_{jt} + \sum\limits_{j \in \mathbf{M}^c_i}\sum\limits_{t \in \mathbf{T}_{ij}}(\vartheta^{ik}_{jt} - \tau^{ik}_{jt}) \le 0, \forall i \in \mathbf{N}, j\in \mathbf{M}^{a}_i, t\in \mathbf{T}_{ij}, k\in[K]\\
735&& \underline{z}_{lj}\alpha_{lk} + \rho^{lk}_{jt} \ge 0, \forall l \in \mathbf{N}, j\in \mathbf{M}^a_l, t\in \mathbf{T}_{lj}, k\in[K]\\
736&& \underline{z}_{lj}\alpha_{lk} + \varrho^{lk}_{jt} \ge 0, \forall l \in \mathbf{N}, j\in \mathbf{M}^c_l, t\in \mathbf{T}_{lj}, k\in[K]\\
737&& \underline{z}_{ij}\alpha_{lk} + \phi^{lk}_{ijt} \ge 0, \forall l \in \mathbf{N}, i \in \mathbf{N} \setminus \{l\}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij}, k\in[K]\\
738&& \overline{z}_{lj}\alpha_{lk} + \rho^{lk}_{jt} \le 0, \forall l \in \mathbf{N}, j\in \mathbf{M}^a_l, t\in \mathbf{T}_{lj}, k\in[K]\\
739&& \overline{z}_{lj}\alpha_{lk} + \varrho^{lk}_{jt} \le 0, \forall l \in \mathbf{N}, j\in \mathbf{M}^c_l, t\in \mathbf{T}_{lj}, k\in[K]\\
740&& \overline{z}_{ij}\alpha_{lk} + \phi^{lk}_{ijt} \le 0, \forall l \in \mathbf{N}, i \in \mathbf{N} \setminus \{l\}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij}, k\in[K]\\
741&& \sum\limits_{i \in \mathbf{N}}\sum\limits_{j \in \mathbf{M}_i}\sum\limits_{t \in \mathbf{T}_{ij}}\Big[\Big((\mu^n_{ijt})^2-(\mu^k_{ijt})^2\Big)\alpha_{lk}\Big] + \sum\limits_{j \in \mathbf{M}^a_l}\sum\limits_{t \in \mathbf{T}_{lj}}2(\mu^n_{ljt}-\mu^k_{ljt})\rho^{lk}_{jt} + \sum\limits_{j \in \mathbf{M}^c_l}\sum\limits_{t \in \mathbf{T}_{lj}}2(\mu^n_{ljt}-\mu^k_{ljt})\varrho^{lk}_{jt}\nonumber \\&& + \sum\limits_{i \in \mathbf{N} \setminus \{l\}}\sum\limits_{j \in \mathbf{M}_i}\sum\limits_{t \in \mathbf{T}_{ij}}2(\mu^n_{ijt}-\mu^k_{ijt})\phi^{lk}_{ijt} \le 0 ,\forall l \in \mathbf{N}, n \in [K], k \in [K]\\
742&& \nu^{k}_{lj}\alpha_{lk} + \rho^{lk}_{jt} + \pi^{lk}_{ljt} \ge 0, \forall l \in \mathbf{N}, j\in \mathbf{M}^a_l, t\in \mathbf{T}_{lj}, k\in[K]\\
743&& \nu^{k}_{lj}\alpha_{lk} + \varrho^{lk}_{jt} + \pi^{lk}_{ljt} \ge 0, \forall l \in \mathbf{N}, j\in \mathbf{M}^c_l, t\in \mathbf{T}_{lj}, k\in[K]\\
744&& \nu^{k}_{ij}\alpha_{lk} + \phi^{lk}_{ijt} + \pi^{lk}_{ijt} \ge 0, \forall l \in \mathbf{N}, i \in \mathbf{N} \setminus \{l\}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij}, k\in[K]\\
745&& \nu^{k}_{lj}\alpha_{lk} + \rho^{lk}_{jt} - \pi^{lk}_{ljt} \le 0, \forall l \in \mathbf{N}, j\in \mathbf{M}^a_l, t\in \mathbf{T}_{lj}, k\in[K]\\
746&& \nu^{k}_{lj}\alpha_{lk} + \varrho^{lk}_{jt} - \pi^{lk}_{ljt} \le 0, \forall l \in \mathbf{N}, j\in \mathbf{M}^c_l, t\in \mathbf{T}_{lj}, k\in[K]\\
747&& \nu^{k}_{ij}\alpha_{lk} + \phi^{lk}_{ijt} - \pi^{lk}_{ijt} \le 0, \forall l \in \mathbf{N}, i \in \mathbf{N} \setminus \{l\}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij}, k\in[K]\\
748&& \underline{z}_{ij}\beta_k + \varphi^{k}_{ijt} \ge 0, \forall i \in \mathbf{N}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij}, k\in[K]\\
749&& \overline{z}_{ij}\beta_k + \varphi^{k}_{ijt} \le 0, \forall i \in \mathbf{N}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij}, k\in[K]\\
750&& \sum\limits_{i \in \mathbf{N}}\sum\limits_{j \in \mathbf{M}_i}\sum\limits_{t \in \mathbf{T}_{ij}}\Big[\Big((\mu^n_{ijt})^2-(\mu^k_{ijt})^2\Big)\beta_k + 2(\mu^n_{ijt}-\mu^k_{ijt})\varphi^{k}_{ijt}\Big] \le 0 ,\forall n \in [K], k \in [K]\\
751&& \nu^{k}_{ij}\beta_k + \varphi^{k}_{ijt} + \varpi^{k}_{ijt} \ge 0, \forall i \in \mathbf{N}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij}, k\in[K]\\
752&& \nu^{k}_{ij}\beta_k + \varphi^{k}_{ijt} - \varpi^{k}_{ijt} \le 0, \forall i \in \mathbf{N}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij}, k\in[K]\\
753&& \halpha,\hbeta, \hgamma, \hlambda \le \mathbf{0},  \\
754&& \htheta, \hdelta, \hvartheta, \htau \ge \mathbf{0}, \label{model:dual-S}
755\end{eqnarray}
756where $({\halpha}, {\hbeta}, {\hphi}, {\hpi}, {\hvarphi}, {\hvarpi}, {\hrho}, {\hvarrho}, {\htheta}, {\hgamma}, {\hdelta}, {\hvartheta}, {\hlambda}, {\htau})$ are auxiliary variables.
757
758Let $D(\x)$ be the optimal solution of the above dual program \eqref{dual}-\eqref{model:dual-S} given $\x$, it is noted that computing $D(\x)$ solves a linear program. Furthermore, This linear program problem $D(\x)$ can be decomposed into $K$ subproblems by splitting the constraints and objective based on $k \in [K]$.  Therefore, the computation of $D(\x)$ can be efficiently even if $K$ is very large.
759
760%\begin{eqnarray}
761%&\!\!\!\!\!\! D_k(\x) = \min &  \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}} \left(\alpha^{k}_{ijt}\underline{\mu}^{k}_{ij} + \beta^{k}_{ijt}\overline{\mu}^{k}_{ij}\right) + \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i}\epsilon^{k}_{ij}\lambda^{k}_{ij} - \tau_{k} \\
762% &\!\!\!\!\!\!{\rm s.t.} & \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}} \Big[\phi^{lk}_{ijt}\underline{z}_{ij}+\varphi^{lk}_{ijt}\overline{z}_{ij}+{\nu^{k}_{ij}\left(\pi^{lk}_{ijt}-\varpi^{lk}_{ijt} \right)}  \nonumber\\
763%  &&+  \sum\limits_{n \in [K]}\psi^{lk}_{n}({(\mu^{n}_{ijt})}^2 - {(\mu^{k}_{ijt})}^2)\Big] + \sum\limits_{j\in \mathbf{M}^{\rm a}_{l}}\sum\limits_{t\in \mathbf{T}_{lj}} q^{lk}_{jt}\mathcal{T}_R+ \tau_k \geq p_k, \forall l \in \mathbf{N}\\
764%  && \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}} \Big[\rho^{k}_{ijt}\underline{z}_{ij}+\varrho^{k}_{ijt}\overline{z}_{ij}+{\nu^{k}_{ij}\left(\gamma^{k}_{ijt}-\theta^{k}_{ijt} \right)} \nonumber\\ && + \sum\limits_{n \in [K]}\varsigma^{k}_{n}({(\mu^{n}_{ijt})}^2 - {(\mu^{k}_{ijt})}^2) \Big] + \tau_k \geq0\\
765%&&  {q^{lk}_{jt}}x_{l jt}  +\sum\limits_{n \in [K]}2(\mu^{n}_{ljt} - \mu^{k}_{ljt})\psi^{lk}_{n}+ \phi^{lk}_{ljt}+\varphi^{lk}_{ljt}+\pi^{lk}_{ljt}-\varpi^{lk}_{ljt} \nonumber \\ &&  = \alpha^{k}_{ljt}+\beta^{k}_{ljt}, \forall l \in \mathbf{N}, j\in \mathbf{M}^{\rm a}_l, t \in \mathbf{T}_{l j} \\[0.3 cm]
766%&&  x_{l jt}\sum\limits_{j \in \mathbf{M}^{\rm a}_l}\sum\limits_{t \in \mathbf{T}_{l j}}{q^{lk}_{jt}}  +\sum\limits_{n \in [K]}2(\mu^{n}_{ljt} - \mu^{k}_{ljt})\psi^{lk}_{n}+ \phi^{lk}_{ljt}+\varphi^{lk}_{ljt}+\pi^{lk}_{ljt}-\varpi^{lk}_{ljt} \nonumber \\ &&  = \alpha^{k}_{ljt}+\beta^{k}_{ljt}, \forall l \in \mathbf{N}, j\in \mathbf{M}^{\rm c}_l, t \in \mathbf{T}_{l j} \\[0.3 cm]
767%&&   \sum\limits_{n \in [K]}2(\mu^{n}_{ijt} - \mu^{k}_{ijt})\psi^{lk}_{n}+  \phi^{lk}_{ijt}+\varphi^{lk}_{ijt}+\pi^{lk}_{ijt}-\varpi^{lk}_{ijt} = \alpha^{k}_{ijt}+\beta^{k}_{ijt},\nonumber\\ && ~ \forall l \in \mathbf{N}, i \in \mathbf{N}\setminus\{l \}, j \in \mathbf{M}_i, t\in \mathbf{T}_{ij} \\
768%&&{|\mathbf{T}_{ij} |\sigma^{k}_{ij}}(\pi^{lk}_{ijt}+\varpi^{lk}_{ijt}) =\lambda^{k}_{ij},  ~ \forall l  \in \mathbf{N}, i \in \mathbf{N}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij} \\
769%&& \sum\limits_{n \in [K]}2(\mu^{n}_{ijt} - \mu^{k}_{ijt})\varsigma^{k}_{n}+ \rho^{k}_{ijt}+\varrho^{k}_{ijt}+\gamma^{k}_{ijt}-\theta^{k}_{ijt} = \alpha^{k}_{ijt}+\beta^{k}_{ijt},\nonumber\\ && ~ \forall i \in \mathbf{N}, j \in \mathbf{M}_i, t\in \mathbf{T}_{ij}  \\
770%&&  {|\mathbf{T}_{ij} |\sigma^{k}_{ij}}(\gamma^{k}_{ijt}+\theta^{k}_{ijt}) =\lambda^{k}_{ij},  ~ \forall i \in \mathbf{N}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij} \\
771%%&&  q_{l jk}\le y_{{l jk}}, ~\forall l  \in \mathbf{N}, j \in \mathbf{M}_{l}, k\in \mathcal{N}(l,j) \\[0.3 cm]
772%%&& y_{{l jk}} \ge M x_{l jk}, ~\forall l  \in \mathbf{N}, j \in \mathbf{M}_{l}, k\in \mathcal{N}(l,j) \\[0.3 cm]
773%%&& y_{{l jk}} \le  q_{l jk }+(x_{l jk}-1)M, ~\forall l  \in \mathbf{N}, j \in \mathcal{J}({l}), k \in \mathcal{N}(l,j)\\[0.3 cm]
774%%&&  \sum\limits_{k\in \mathbf{K}_{ij}} x^{k}_{ij}\ge  L_{ij}, ~  \forall  i \in \mathbf{N}, j\in \mathbf{M}_i \\
775%&& \halpha,\q, \hpsi, \hvarphi, \hvarsigma, \hvarrho \le \mathbf{0}, \htau_{k} \in \mathbb{R}  \\
776%&& \hbeta, \hlambda, \hphi, \hrho, \hpi,\hvarpi, \hgamma, \htheta \ge \mathbf{0}
777%\end{eqnarray}. \fi
778
779Now the problem \eqref{HP1-ambiguity-X} can be rewritten as
780\begin{equation}\label{HP1-ambiguity-XI}
781\begin{array}{rcll}
782& \min\limits_{\x} &  C(\x)=\sum\limits_{i\in \mathbf{N}} \sum\limits_{j \in \mathbf{M}_i}\left[ \sum\limits_{t\in \mathbf{T}_{ij}} x_{ijt}\right]c_{ij} \\[0.5 cm]
783&{\rm s.t.} & D(\x) \ge R_{0} & \\[0.3 cm]
784&& L_{i} \le \sum\limits_{j\in \mathbf{M}_i}\sum\limits_{t\in \mathbf{T}_{ij}} x_{ijt}\le  U_{i}, \forall i \in \mathbf{N}\\[0.3 cm]
785&& \x \in \{0,1\}^H
786\end{array}
787\end{equation}
788which is termed the {\it master} problem.
789
790
791 We now approach this master problem \eqref{HP1-ambiguity-XI} by solving a sequence of {\it supergradient} based relaxed problems iteratively to approximate the optimality. If its optimal solution is feasible in the original problem, then the solution is also optimal in the original problem. Otherwise, we add more constraints into the problem. Repeat this process until either the problem becomes infeasible, or an optimal solution is obtained.
792
793
794 It is noted that $D(\cdot)$ is a concave function which always has a {\it supergradient}. In particular, we denote by $\s(\x)$ a {\it supergradient} of $D(\x)$ at $\x$, which by definition means that $D(\x) \le D(\y) + s(\y)^{\top}(\x-\y), \forall \y \in S$. Note that when $\x = \y$, the two sides are equal. With supergradient $\s(\x)$, we can formulate the {\it relaxed master problem} for \eqref{HP1-ambiguity-XI} as
795\begin{equation}\label{HP1-ambiguity-XIII}
796\begin{array}{rcll}
797& \min\limits_{\x} &  C(\x)=\sum\limits_{i\in \mathbf{N}} \sum\limits_{j \in \mathbf{M}_i}\left[ \sum\limits_{t\in \mathbf{T}_{ij}} x_{ijt}\right]c_{ij} \\[0.5 cm]
798&{\rm s.t.} & D(\y) + s(\y)^{\top}(\x-\y) \ge R_{0}, & \forall \y \in \mathcal{Y} \subset \{0,1\}^H \\[0.3 cm]
799&& L_{i} \le \sum\limits_{j\in \mathbf{M}_i}\sum\limits_{t\in \mathbf{T}_{ij}} x_{ijt}\le  U_{i}, & \forall i \in \mathbf{N}\\[0.3 cm]
800&& \x \in \{0,1\}^H.
801\end{array}
802\end{equation}
803
804
805The following lemma computes the supergradient $\s(\x)$ of $D(\x)$.
806\begin{lemma}
807Given any feasible $\x$ and $(\overline{\halpha}, \overline{\hbeta}, \overline{\hphi}, \overline{\hpi}, \overline{\hvarphi}, \overline{\hvarpi}, \overline{\hrho}, \overline{\hvarrho}, \overline{\htheta}, \overline{\hgamma}, \overline{\hdelta}, \overline{\hvartheta}, \overline{\hlambda}, \overline{\htau})$ is the optimal solution of the dual problem \eqref{dual}-\eqref{model:dual-S} given $\x$. Let
808$$\s(\x)=\Big(s_{ijt}(\x)\Big)_{i\in \mathbf{N}, j \in \mathbf{M}_i, t \in \mathbf{T}_{ij}}$$
809with
810\begin{equation}
811s_{ijt}(\x) := \left\{
812                \begin{array}{ll}
813                \sum\limits_{k\in[K]}\left(\overline{\gamma}^{lk}_{jt}+\overline{\delta}^{lk}_{jt}\right)M, \forall l \in \mathbf{N}, j \in \mathbf{M}^a_l, t \in \mathbf{T}_{lj}\\
814                \sum\limits_{k\in[K]}\left(\overline{\lambda}^{lk}_{jt}+\overline{\tau}^{lk}_{jt}\right)M, \forall l \in \mathbf{N}, j \in \mathbf{M}^c_l, t \in \mathbf{T}_{lj}\end{array}\right.,
815\end{equation}
816where  and define $\overline{\hgamma}^{l}_j = \overline{\hdelta}^{l}_j = \mathbf{0}, \forall j \in \mathbf{M}_{l}^{c}$ and $\overline{\hlambda}^{l}_j = \overline{\htau}^{l}_j = \mathbf{0}, \forall j \in \mathbf{M}_{l}^{a}$. Then $\s(\x)$ is a supergradient of $D(\x)$ at $\x$.
817\end{lemma}
818
819{\bf Proof. }
820Since $D(\x)$ is concave, it is equivalent to prove the following inequality:
821\begin{equation}\label{subgrad}
822D(\hat{\x}) \le D(\x) + s(\x)^{\top}(\hat{\x}-\x), \forall \hat{\x} \in \mathcal{X},
823\end{equation}
824where $$\mathcal{X}:= \left\{\x \in \{0,1\}^H:  L_{i} \le \sum\limits_{j\in \mathbf{M}_i}\sum\limits_{t\in \mathbf{T}_{ij}} x_{ijt}\le  U_{i}, \forall i \in \mathbf{N}\right\}.$$
825
826
827After obtaining $(\overline{\halpha}, \overline{\hbeta}, \overline{\hphi}, \overline{\hpi}, \overline{\hvarphi}, \overline{\hvarpi}, \overline{\hrho}, \overline{\hvarrho}, \overline{\htheta}, \overline{\hgamma}, \overline{\hdelta}, \overline{\hvartheta}, \overline{\hlambda}, \overline{\htau})$ as the optimal solution for given $\x$, we can rewrite the right side of \eqref{subgrad} as:
828\begin{eqnarray}
829&& \sum\limits_{l\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}^{a}_l} \sum\limits_{t\in \mathbf{T}_{lj}}\sum\limits_{k\in[K]}(\overline{\gamma}^{lk}_{jt}+\overline{\delta}^{lk}_{jt})M\hat{x}_{ljt} +
830\sum\limits_{l\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}^{c}_l} \sum\limits_{t\in \mathbf{T}_{lj}}\sum\limits_{k\in[K]}(\overline{\lambda}^{lk}_{jt}+\overline{\tau}^{lk}_{jt})M\hat{x}_{ljt} \nonumber\\
831& +& \sum\limits_{l\in \mathbf{N}}\sum\limits_{k\in[K]}{p_k}{\overline{\alpha}^{l}_k} -
832\sum\limits_{l\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}^{a}_l} \sum\limits_{t\in \mathbf{T}_{lj}}\sum\limits_{k\in[K]}\overline{\delta}^{lk}_{jt}M -
833\sum\limits_{l\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}^{c}_l} \sum\limits_{t\in \mathbf{T}_{lj}}\sum\limits_{k\in[K]}\overline{\tau}^{lk}_{jt}M,
834\end{eqnarray}
835and the left side as
836\begin{eqnarray*}
837&\!\!\!\!\!\! \min &
838\sum\limits_{l\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}^{a}_l} \sum\limits_{t\in \mathbf{T}_{lj}}\sum\limits_{k\in[K]}(\gamma^{lk}_{jt}+\delta^{lk}_{jt})M\hat{x}_{ljt} +
839\sum\limits_{l\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}^{c}_l} \sum\limits_{t\in \mathbf{T}_{lj}}\sum\limits_{k\in[K]}(\lambda^{lk}_{jt}+\tau^{lk}_{jt})M\hat{x}_{ljt} \nonumber\\
840&& + \sum\limits_{l\in \mathbf{N}}\sum\limits_{k\in[K]}{p_k}{\alpha^{l}_k} -
841\sum\limits_{l\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}^{a}_l} \sum\limits_{t\in \mathbf{T}_{lj}}\sum\limits_{k\in[K]}\delta^{lk}_{jt}M -
842\sum\limits_{l\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}^{c}_l} \sum\limits_{t\in \mathbf{T}_{lj}}\sum\limits_{k\in[K]}\tau^{lk}_{jt}M\\
843&\!\!\!\!\!\!{\rm s.t.} & \eqref{model-c-1}-\eqref{model:dual-S}.
844\end{eqnarray*}
845
846Since the constraints are independent of $\hat{\x}$, $(\overline{\halpha}, \overline{\hbeta}, \overline{\hphi}, \overline{\hpi}, \overline{\hvarphi}, \overline{\hvarpi}, \overline{\hrho}, \overline{\hvarrho}, \overline{\htheta}, \overline{\hgamma}, \overline{\hdelta}, \overline{\hvartheta}, \overline{\hlambda}, \overline{\htau})$ is a feasible solution of $D(\hat{\x})$. Since $D(\hat{\x})$ is the solution to a minimization problem,
847
848\begin{eqnarray*}
849D(\hat{\x})&\le& \sum\limits_{l\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}^{a}_l} \sum\limits_{t\in \mathbf{T}_{lj}}\sum\limits_{k\in[K]}(\overline{\gamma}^{lk}_{jt}+\overline{\delta}^{lk}_{jt})M\hat{x}_{ljt} +
850\sum\limits_{l\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}^{c}_l} \sum\limits_{t\in \mathbf{T}_{lj}}\sum\limits_{k\in[K]}(\overline{\lambda}^{lk}_{jt}+\overline{\tau}^{lk}_{jt})M\hat{x}_{ljt} \nonumber\\
851&+& \sum\limits_{l\in \mathbf{N}}\sum\limits_{k\in[K]}{p_k}{\overline{\alpha}^{l}_k} -
852\sum\limits_{l\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}^{a}_l} \sum\limits_{t\in \mathbf{T}_{lj}}\sum\limits_{k\in[K]}\overline{\delta}^{lk}_{jt}M -
853\sum\limits_{l\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}^{c}_l} \sum\limits_{t\in \mathbf{T}_{lj}}\sum\limits_{k\in[K]}\overline{\tau}^{lk}_{jt}M, \forall \hat{\x} \in \mathcal{X}.
854\end{eqnarray*}
855
856It is equivalent to \eqref{subgrad}, so we have proved that $\s(\x)$ is a supergradient of $D(\x)$ at $\x$. \blot\\
857
858%Furthermore, if we compute $D(\y^m)$ and the supergradient $\s(\y^m)$ with $M$ points $\y^m \in S, \forall m \in [M](M \le |S|)$, we form the following relaxed problem:
859%\begin{equation}\label{HP1-ambiguity-XIII}
860%\begin{array}{rcll}
861%& \min\limits_{\x} &  C(\x) \\[0.5 cm]
862%&{\rm s.t.} & D(\y^m) + s(\y^m)^{\top}(\x-\y^m) \ge R_{0}, & \forall m \in [M]\\[0.3 cm]
863%&& \x \in S.
864%\end{array}
865%\end{equation}
866
867It is noted that if the optimal solution to the relaxed problem \eqref{HP1-ambiguity-XIII} is feasible in the original problem, it is also the optimal solution of the original problem. We present the overall algorithm below:
868
869\noindent\rule{\textwidth}{0.1em}\vspace{-5pt}\\
870\noindent {\bf Algorithm 2.} Supergradient-based iterative optimization framework for problem \eqref{HP1-ambiguity-XI}\vspace{-10pt}\\
871\noindent\rule{\textwidth}{0.05em} \\
872{\bf Input:} $y^0 \in S, m=M=0, LB=-\infty, UB=\infty$. \\
873{\bf Output:} Design $\x^{*}$ and associated cost.
874
875\begin{enumerate}
876\item  Compute $D(\y^m)$;
877\item  Compute supergradient $\s(\y^m)$ at $\y^m$, and solve \eqref{HP1-ambiguity-XIII}: if it is infeasible, STOP and declare the overall problem infeasible; otherwise, obtain $\x^{*}$ as the optimal solution;
878\item  If $D(\x^*) \ge R_0$, STOP; else, let $m = m + 1$, $M = M + 1$, $\y^m = \x^{*}$ and go to STEP 1;
879\item  {\bf return} Design $\x^{*}$ and associated cost $C(\x^{*})$.
880\end{enumerate}
881
882\vspace{-5pt}
883\noindent\rule{\textwidth}{0.1em}\vspace{-17pt}\\
884}
885\iffalse
886Note that the algorithm above produce exact solution to the problem. By relaxing the condition $UB \le LB$ in STEP 3 to $UB - LB \le \epsilon$ for some $\epsilon > 0$, we can also get an algorithm that produces approximate solution to the problem, potentially with much less time.
887\fi
888
889
890
891
892
893%\section{Computational Study}
894%In this section we present numerical experiments of our model, as well as a case study. The computational study consists of six parts: (i) visualizing the result of dimension reduction and clustering; (ii) testing how design changes when parameters vary, including $K$ and other hyperparameters; (iii) choosing best parameter $K$ by cross validation; (iv) comparing our design with a baseline probabilistic model;  (v) experimenting on the value of side information and (vi) a real-life case. The distribution used in experiment (i)-(v) is generated by a real data set.
895%All computational result were done on a PC with Intel(R) Core(TM) i7-7500U CPU at 2.7 GHz, coded in Python. The MIP models were solved by library called Gurobi, version 8.1.1.
896%
897%\subsection{Visualizing clusters}
898%In this section we present a visualization of dimension reduction and clustering. We first apply tSNE algorithm, a state-of-art algorithm for dimension reduction and visualization, \iffalse (refer to Maaten L, Hinton G. Visualizing data using t-SNE[J]. Journal of machine learning research, 2008, 9(Nov): 2579-2605.)\fi to reduce the lifetime data from 28 dimensions to 2 dimensions, and then perform K-means clustering. We present figures of different choice of number of clusters, $K=2$ and $K=5$, respectively. As we can see in the figure, the original multi-dimensional data can be well clustered after dimension reduction. The clustering results, including results from other choices of $K$, are used in %following experiments.
899%
900%\begin{figure}[H]
901%\centering
902%%\includegraphics{TSNE_1.png}
903%\includegraphics[width=\columnwidth]{2D_tsne.jpg}
904%\caption{\footnotesize Visualization of dimension reduction by tSNE algorithm and clustering by K-means algorithms, into 2 clusters on the left and 5 clusters on the right. Note that the two figures are produced by different data.}
905%\label{figure_TSNE_1}
906%\end{figure}
907%
908%\subsection{System design variation with different parameters}
909%
910%\begin{figure}[H]
911%\centering
912%\includegraphics[scale=0.8]{KVARIATION11.pdf}
913%\caption{ The multi-type series-parallel system we are experimenting in this section. It consists of 3 subsystems, with 1 type of active parallel component and 2 types of cold-standby component in each subsystem. Each type of component can have up to 3 redundancies.}
914%\label{figure1}
915%\end{figure}
916%
917%In this subsection we experiment on adjusting parameters of the model and observe how system design $x$ changed accordingly.  we consider the following setting: a series-parallel system with  subsystems ($|\mathbf{N}|=3$), with each subsystem containing 3 types of components ($|\mathbf{M}_i|= 3, \forall i \in \mathbf{N}$), among which 1 types are active-parallel and 2 types are cold-standby, and each type is of 3 redundant components ($|\mathbf{T}_{ij}|=3, \forall i \in \mathbf{N}, j\in \mathbf{M}_i$). The lifetime requirement $\mathcal{T}_R = 29$.  The parameters we are adjusting includes $K \in [1, 10], R_0 \in \{0.95, 0.97, 0.99\}$, $\epsilon_{ij} \in \{0.05, 0.075, 0.1\},\forall i \in [3], j \in [3]$, and $\Delta$ to adjust $\underline{\hmu}$ and $\overline{\hmu}$:.
918%$$
919%\underline{\mu}^{k}_{ij}={\nu}^{k}_{ij}-\Delta,
920%\quad \overline{\mu}^{k}_{ij}={\nu}^{k}_{ij}+\Delta,
921%$$
922%where $\Delta \in \{0.025, 0.05, 0.075, 0.1\}$.  Therefore, by changing the values of $\Delta$, we can have different sets of parameters $\underline{\hmu}$ and $\overline{\hmu}$. Specifically, large values of $\Delta$ correspond to the large gaps between $\underline{\hmu}$ and $\overline{\hmu}$.
923%
924%\begin{table}[!htbp]\label{d-table}\scriptsize%\footnotesize%\small%
925%\caption{ \footnotesize  The design table for different $K$ under $\mathcal{T}_R = 29$, $\epsilon = 0.05$. AP, CS-I and CS-II represents the active parallel type, cold-standby type 1 and cold-standby type 2, respectively.}
926%\begin{center}
927%\begin{adjustbox}{angle=270}
928%\scalebox{1}{
929%\begin{tabular}{|c|c|c| c|| ccccc|| ccccc|| ccccc||}\hline
930% \multirow{2}{*}{$\epsilon$} & \multirow{2}{*}{$\Delta$} & \multirow{2}{*}{Subsystem} & \multirow{2}{*}{Type} & \multicolumn{5}{c||}{$R_0 = 0.95, K$} & \multicolumn{5}{c||}{$R_0 = 0.97, K$} & \multicolumn{5}{c||}{$R_0 = 0.99, K$}\\
931% \cline{5-19}
932%      &&&&1 & 3 & 5 &  8 &  10 &1 & 3 & 5 &  8 &  10 & 1 & 3 & 5 &  8 &  10  \\
933%     \hline
934%                    &&& AP & 1 & 1 & 1 &1 & 1 & 0 &0 & 0 & 0 & 1 & 1 & 1 & 1 &1 & 1\\
935%        &&1         & CS-l & 1 & 1 & 1 &1 & 1 & 2 & 2 & 2 & 2& 1 & 2 & 2 & 2 &2 & 2\\
936%                    &&& CS-ll & 0 & 0 & 0 &0 & 0 & 0 & 0 & 0 &0 & 0& 0 & 0 & 0 &0 & 0\\
937%     \cline{3-19}
938%                    &&& AP & 1 & 1 & 1 &1 & 1 & 1 & 1 & 1 &1 & 1& 1 & 1 & 1 &1 & 1\\
939%        &0.025&2    & CS-l & 0& 0 & 0 & 0 & 1 & 0 & 0 & 0 &0& 0& 0 & 0 & 0 &0 & 0\\
940%                    &&& CS-ll & 2 & 2 & 2 &2 & 2 & 2 & 2 & 2 &2 & 2& 2 & 2 & 2 &2 & 2\\
941%     \cline{3-19}
942%                    &&& AP &0 & 0 & 1& 1 & 1 & 0 &1 & 1 &1 & 1 & 1 &0 &0 &0 & 0\\
943%        &&3         & CS-l & 1 & 1& 0& 0 & 0 & 1 & 0 & 0& 0 & 1 & 1 & 1 & 1 &1 & 1\\
944%                    &&& CS-ll & 3 & 3 & 3 &3 & 3&  3  & 3 & 3& 3 & 3& 3 & 3 & 3 &3 & 3\\
945%\cline{2-19}
946%\multicolumn{1}{|c|}{} & \multicolumn{3}{c||}{Design cost (k\$)} & 39.5& 39.5&\!\! 39    &39& 39 & 44.5&44&44&\!\!44 & 41 &55 & 54.5 &\!\! 54.5 &54.5& 54.5 \\
947%     \cline{2-19}
948%                    &&& AP& 1 & 1 & 1 &1 & 1 & 0 & 0 & 0 &0 & 0 & 1& 1 & 1 & 1 &1\\
949%        &&1         & CS-l& 1 & 1 & 1 &1 & 1 & 2 & 2 & 2 &2 & 2& 2 & 2 & 2 &2 & 2 \\
950%                    &&& CS-ll & 0 & 0 & 0 &0 & 0 & 0 & 0 & 0 &0 & 0& 0 & 0 & 0 &0& 0\\
951%     \cline{3-19}
952%                    &&& AP & 1 & 1 & 1 &1 & 1 & 1 & 1 & 1 &1 & 1 & 1 & 1 & 1 &1 & 1 \\
953%        &0.05&2     & CS-l &1 & 0 & 0 &0 & 0 & 0 & 0 & 0 &0 & 0& 0 & 0 & 0 &0 & 0\\
954%                    &&& CS-ll & 2 & 2 & 2 &2 & 2 & 2 & 2 &2 &2 &2& 2 & 2 & 2 &2 &2\\
955%     \cline{3-19}
956%                    &&& AP & 1 & 1 & 1 &1 & 0 & 1& 1 & 0& 0 & 0 & 1 &1 & 1 &1 & 1\\
957%        &&3         & CS-l & 3 & 3 & 3 &3 & 1 & 3 & 3 & 1& 1 & 1 &  3 & 3 & 3 & 3 & 3\\
958%                    &&& CS-ll & 1 & 1 & 1 &1 & 3 & 1 & 1& 3& 3 & 3 & 1 & 1 & 1 &1 & 1\\
959%\cline{2-19}
960%\multicolumn{1}{|c|}{0.05} & \multicolumn{3}{c||}{Design cost (k\$)} & 40&40 &\!\! 40  &40& 39.5 & 45&45&\!\! 44.5 & 44.5 & 44.5 & 55&55&\!\! 55  & 55 & 55\\
961%     \cline{2-19}
962%                     &&& AP& 1& 1 & 1 & 1 &1 & 0 & 0 & 0 &0 & 0 & 1& 1 & 1 & 1 &1\\
963%        &&1           &CS-l2 & 1& 1 & 1 & 1 &1 & 2 & 2 & 2 &2 & 2& 2 & 2 & 2 &2 & 2\\
964%                    &&& CS-ll & 0 & 0 & 0 &0 & 0 & 0 & 0 & 0 &0 & 0& 0 & 0 & 0 &0 & 0 \\
965%     \cline{3-19}
966%                    &&& AP & 1& 1 & 1 & 1 &1 & 1 &1 & 1 & 1 &1 & 1& 1 & 1 & 1 &1  \\
967%        &0.075&2    & CS-l & 0 & 0 & 0 &0 & 0 & 0 & 0 & 0 &0 & 0& 0 & 0 & 0 &0 & 0 \\
968%                    &&& CS-ll & 2 & 2 & 2 &2 & 2 & 2 & 2 & 2 &2 & 2& 2 & 2 & 2 &2 & 2 \\
969%     \cline{3-19}
970%                    &&& AP & 1& 1 & 1 & 1 &1  & 1 &1 & 1 & 1 &1 & 1& 1 & 1 & 1 &1\\
971%        &&3           & CS-l & 3 & 3 & 3 & 3 & 3 & 3 & 3 & 3 &3 & 3& 3 & 3 & 3 &3 & 3\\
972%                    &&& CS-ll & 1& 1 & 1 & 1 &1  & 1 &1 & 1 & 1 &1 & 1& 1 & 1 & 1 &1\\
973%\cline{2-19}
974%\multicolumn{1}{|c|}{} & \multicolumn{3}{c||}{Design cost (k\$)} & 40&40&\!\! 40 & 40 & 40 & 45&45&\!\! 45& 45&45 & 55&55 &\!\! 55    & 55& 55\\
975%
976%     \cline{2-19}
977%                    &&& AP& 1& 1 & 1 & 1 &1 & 0 & 0 & 0 &0 & 0 & 1& 1 & 1 & 1 &1\\
978%
979%        &&1         & CS-l & 1& 1 & 1 & 1 &1 & 2 & 2 & 2 &2 & 2& 2 & 2 & 2 &2 & 2\\
980%                    &&&CS-ll3 & 0 & 0 & 0 &0 & 0 & 0 & 0 & 0 &0 & 0& 0 & 0 & 0 &0 & 0\\
981%     \cline{3-19}
982%                    &&& AP & 1& 1 & 1 & 1 &1  &  1 &1 & 1 & 1 &1 & 1& 1 & 1 & 1 &1 \\
983%        &0.1&2          & CS-l & 0 & 0 & 0 &0 & 0 & 0 & 0 & 0 &0 & 0& 0 & 0 & 0 &0 & 0 \\
984%                    &&& CS-ll & 2 & 2 & 2 &2 & 2 & 2 & 2 & 2 &2 & 2& 2 & 2 & 2 &2 & 2\\
985%     \cline{3-19}
986%                    &&& AP &1 & 1& 1 & 1 & 1 & 1  & 1 & 1 & 1 &1 & 1& 1 & 1 & 1 &1 \\
987%        &&3         & CS-l & 3 & 3 & 3 &3 & 3 & 3 & 3 & 3 &3 & 3& 3 & 3 & 3 &3 & 3\\
988%                    &&& CS-ll& 1& 1 & 1 & 1 &1  & 1 & 1 & 1 & 1 &1 & 1& 1 & 1 & 1 & 1 \\
989%\cline{2-19}
990%\multicolumn{1}{|c|}{} & \multicolumn{3}{c||}{Design cost (k\$)} & 40&\!\! 40   & 40&40 & 40 & 45&45&\!\! 45 &45& 45 & 55&55&\!\! 55 & 55 & 55 \\
991%\hline
992%\end{tabular}}
993%\end{adjustbox}
994%\end{center}
995%\end{table}
996%
997%The resulting system design in the case of $\epsilon_{ij} = 0.05$ are shown in the table. We leave the rest of experiment results in the Electronic Companion. Note that when $K = 1$, the model reduces to the case of robust model without clustering by Wang, et. al.
998%
999%The observations of the experimental results are the following:
1000%(i) the cost increases when the variation range $[\underline{\hmu}, \overline{\hmu}]$ of expected lifetimes increases, or the dispersion parameter $\epsilon$ of the lifetimes of components enlarges. Such increased cost is due to the enlarged ambiguity set $\mathbb{F}_K$ resulting from the change of above distributional parameters $([\underline{\hmu}, \overline{\hmu}]$. (ii) Intuitively, cost also increases if the required reliability level $R_0$ is increased.
1001%(iii) the cost decreases as number of clusters $K$ increases. This is due to to the fact that since the mean and dispersion information of each cluster are included in the ambiguity set, more clusters implies more information and therefore smaller ambiguity set.
1002%
1003%We then perform out-of-sample tests on the designs $K = 1, 4, 8$. To test the robustness of the design, we generate testing data with smaller lifetime mean and much larger standard deviation. In particular, we let $\hmu_{test} = (1 - \Delta_{m})\hmu_{train}$, and $\hsigma_{test} = \Delta_{s}\hsigma_{train}$. $\Delta_m$ is set to 10\%, and $\Delta_s$ is set to 2000\%, 3000\% and 4000\%, respectively.
1004%
1005%\begin{table}[htp]\footnotesize%\small%
1006%\caption{\label{tab-compare2} \footnotesize The out of sample reliability of designs generated by $K = 1, 4, 8$ models under different $\Delta_{s}$. $R_0$ is set to $0.9$. In the design columns, the 3 columns are corresponding to different subsystems. The three numbers in each column indicates the number of redundancies used that is active parallel, cold-standby type 1, or cold-standby type 2, respectively. }
1007%\begin{center}
1008%\begin{tabular}{|c||c|c|| c|c|c| c |  c |c   |}\hline
1009%\multirow{2}{*}{$\mathcal{T}_R$}  & \multirow{2}{*}{$(\Delta_{m}, \Delta_{s})$} & \multirow{2}{*}{Model} & \multicolumn{3}{c|}{Design} & \multirow{2}{*}{cost} & \multirow{2}{*}{Mean of out-of-sample reliability level} & \multirow{2}{*}{StD} \\
1010%\cline{4-6}
1011%&&& 1 & 2 & 3 &&&\\
1012%\hline
1013%\multirow{9}{*}{25} & \multirow{3}{*}{(10\%, 2000\%)} &  K=1 Model & (1,1,0) & (1,2,0) & (1,0,3)& 38.5 &0.999 & 0.035   \\
1014%     & & K=4 Model & (1,1,0) & (1,2,0) & (0,3,1)& 38.0 &0.966 & 0.182 \\
1015%     & & K=8 Model & (1,1,0) & (1,2,0) & (0,0,3)& 37.0 &0.607 & 0.489  \\
1016%    \cline{2-9}
1017%     &\multirow{3}{*}{(10\%, 3000\%)}  &  K=1 Model & (1,1,0) & (1,2,0) & (1,0,3)& 38.5 &  0.989 & 0.103   \\
1018%     & & K=4 Model & (1,1,0) & (1,2,0) & (0,3,1)& 38.0 &0.925 & 0.263 \\
1019%     & & K=8 Model & (1,1,0) & (1,2,0) & (0,0,3)& 37.0 &0.576 & 0.494  \\
1020%    \cline{2-9}
1021%     &\multirow{3}{*}{(10\%, 4000\%)}  &  K=1 Model & (1,1,0) & (1,2,0) & (1,0,3)& 38.5 &  0.975 & 0.157   \\
1022%     & & K=4 Model & (1,1,0) & (1,2,0) & (0,3,1)& 38.0 &0.894 & 0.308 \\
1023%     & & K=8 Model & (1,1,0) & (1,2,0) & (0,0,3)& 37.0 &0.551 & 0.497  \\
1024%\hline
1025%
1026%\end{tabular}
1027%\end{center}
1028%\end{table}
1029%
1030%\begin{figure}[H]
1031%\centering
1032%\includegraphics[width=\columnwidth]{Out_of_sample_K.png}
1033%\caption{\footnotesize Figure (a)(b)(c) represents the out of sample reliability of designs generated by $K = 1, 4, 8$ models under different $\Delta_{s}$, respectively. The vertical beam represents $\mathcal{T}_R$. The fraction of the lifetime histogram on the right side of beam represents the out-of-sample reliability level.}
1034%\label{figureK}
1035%\end{figure}
1036%
1037%From the results, we can observe that with moderate number of clusters ($K = 4$), we can obtain designs with less costs than designs generated by robust model with no clustering ($K = 1$), while retaining robustness even when there is a significant shrink in mean lifetime and much larger standard deviation. This shows that our framework can produce designs that are robust enough and cheaper, by the incorporation of clustering. However, if the number of clusters becomes too large ($K = 8$), the resulting model lost robustness in these extreme tests. Thus, it is crucial to choose the optimal $K$. We present an experiment of choosing $K$ by cross validation in the next subsection.
1038%
1039%\subsection{Choosing $K$ by cross validation}
1040%In this subsection, we present a cross validation experiment by applying Algorithm 2. In particular, we choose $m = 10$ and do a 10-fold cross validation. The number of instances of constraints violation, as well as the cost of designs, are plotted in the figure below on the left.  We also offers a combined metric for cross validation. First, the cost and number of validation are both normalized to the range $[0, 1]$. Then, compute $(1-\lambda)cost(K) + \lambda{{\#}violation(K)}$, where $\lambda \in [0,1]$. By assigning different $\lambda$, we can adapt to scenarios in of different cost-violation tradeoffs. In particular, high $\lambda$ means that robustness of the design is more valued than the cost; low $\lambda$ indicates the contrary. The combined metric under different $\lambda$ are plotted in the figure below on the right. Observe that when $\lambda$ is low, large $K$ such as 7 and 9 are preferred; when $\lambda$ is high, moderate $K$ like 5 are better. Since large $K$ generally correspond to less cost, this result matches the intuition that people are willing to pay more cost if robustness is more valued.
1041% %least constraint violation occurs, so $K = 5$ is the ideal parameter to cluster this data set. $K = 5$ will be used in the following subsections.
1042%\iffalse
1043%\begin{figure}[H]
1044%\begin{subfigure}{0.5\textwidth}
1045%\includegraphics[scale=0.65]{cross_validation.png}
1046%\caption{\footnotesize}
1047%\label{figure4-1}
1048%\end{subfigure}
1049%\begin{subfigure}{0.5\textwidth}
1050%\centering
1051%\includegraphics[scale=0.37]{cross_validation_lambda.png}
1052%\caption{\footnotesize }
1053%\label{figure4-2}
1054%\end{subfigure}
1055%\caption{\footnotesize (a) The number of violations and costs with different $K$. (b) Costs penalized by $\lambda$ with different $K$, with each line associated with a different $\lambda$.}
1056%\end{figure}
1057%\fi
1058%
1059%\begin{figure}[H]
1060%\centering
1061%\includegraphics[width=\columnwidth]{cv.jpg}
1062%\caption{\footnotesize (a) The number of violations and costs with different $K$. (b) Costs penalized by $\lambda$ with different $K$, with each line associated with a different $\lambda$.}
1063%\label{figure4-2}
1064%\end{figure}
1065%
1066%
1067%
1068%
1069%\iffalse
1070%\begin{table}[h*]\footnotesize%\small%
1071%\caption{\label{tab-compare2} \footnotesize  Out-of-sample reliability level comparison ($R_0=0.85$), where `Design' specifies the number of components allocated in each of 3 subsystems, and `P-Model' and `C-DRO-Model' refer to the probabilistic model and robust model, respectively. }
1072%\begin{center}
1073%\begin{tabular}{|c||c|c|| c| c |c |c   |}\hline
1074%    $\mathcal{T}_R$ & Out of sample $\sigma$ &Model & Design &   Designed reliability level &Mean of out-of-sample reliability level & StD \\
1075%     \hline
1076%    \multirow{6}{*}{7.625} & \multirow{2}{*}{2}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.85$}& 0.834 & 0.372   \\
1077%     && C-DRO-Model & (1,1,3) &  &0.997 & 0.053  \\
1078%        \cline{2-7}
1079%     &\multirow{2}{*}{6}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.85$}& 0.724 & 0.447   \\
1080%     && C-DRO-Model & (1,1,3) &  &0.982 & 0.132  \\
1081%        \cline{2-7}
1082%     &\multirow{2}{*}{10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.85$}& 0.673 & 0.469   \\
1083%     && C-DRO-Model & (1,1,3) &  &0.971 & 0.168  \\
1084%\hline
1085%    \multirow{6}{*}{7.75} & \multirow{2}{*}{2}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.85$}& 0.781 & 0.414   \\
1086%     && C-DRO-Model & (1,1,4) &  &0.999 & 0.028  \\
1087%                 \cline{2-7}
1088%     &\multirow{2}{*}{6}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.85$}& 0.690 & 0.462   \\
1089%     && C-DRO-Model & (1,1,4) &  &0.991 & 0.095  \\
1090%                  \cline{2-7}
1091%     &\multirow{2}{*}{10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.85$}& 0.637 & 0.481   \\
1092%     && C-DRO-Model & (1,1,4) &  &0.989 & 0.105  \\
1093%\hline
1094%    \multirow{6}{*}{7.875} & \multirow{2}{*}{2}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.85$}& 0.716 & 0.451   \\
1095%     && C-DRO-Model & (1,1,7) &  &1.000 & 0.000  \\
1096%               \cline{2-7}
1097%     &\multirow{2}{*}{6}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.85$}& 0.640 & 0.480   \\
1098%     && C-DRO-Model & (1,1,7) &  &0.998 & 0.040  \\
1099%               \cline{2-7}
1100%     &\multirow{2}{*}{10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.85$}& 0.602 & 0.490   \\
1101%     && C-DRO-Model & (1,1,7) &  &0.999 & 0.035  \\
1102%\hline
1103%\end{tabular}
1104%\end{center}
1105%\end{table}
1106%
1107%\begin{table}[h*]\footnotesize%\small%
1108%\caption{\label{tab-compare2} \footnotesize  Out-of-sample reliability level comparison ($R_0=0.90$), where `Design' specifies the number of components allocated in each of 3 subsystems, and `P-Model' and `C-DRO-Model' refer to the probabilistic model and robust model, respectively. }
1109%\begin{center}
1110%\begin{tabular}{|c||c|c|| c| c |c |c   |}\hline
1111%    $\mathcal{T}_R$ & Out of sample $\sigma$ &Model & Design &   Designed reliability level &Mean of out-of-sample reliability level & StD \\
1112%     \hline
1113%    \multirow{6}{*}{7.625} & \multirow{2}{*}{2}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.618 & 0.486   \\
1114%     && C-DRO-Model & (1,1,3) &  &0.998 & 0.047  \\
1115%        \cline{2-7}
1116%     &\multirow{2}{*}{6}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.576 & 0.494   \\
1117%     && C-DRO-Model & (1,1,3) &  &0.989 & 0.103   \\
1118%        \cline{2-7}
1119%     &\multirow{2}{*}{10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.560 & 0.496   \\
1120%     && C-DRO-Model & (1,1,3) &  &0.988 & 0.111   \\
1121%\hline
1122%    \multirow{6}{*}{7.75} & \multirow{2}{*}{2}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.528 & 0.499   \\
1123%     && C-DRO-Model & (1,1,4) &  &0.999 & 0.037   \\
1124%                 \cline{2-7}
1125%     &\multirow{2}{*}{6}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.538 & 0.499   \\
1126%     && C-DRO-Model & (1,1,4) &  &0.996 & 0.063   \\
1127%                  \cline{2-7}
1128%     &\multirow{2}{*}{10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.530 & 0.499   \\
1129%     && C-DRO-Model & (1,1,4) &  &0.994 & 0.080   \\
1130%\hline
1131%    \multirow{6}{*}{7.875} & \multirow{2}{*}{2}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.458 & 0.498   \\
1132%     && C-DRO-Model & (1,1,7) &  &0.996 & 0.060  \\
1133%               \cline{2-7}
1134%     &\multirow{2}{*}{6}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.481 & 0.500   \\
1135%     && C-DRO-Model & (1,1,7) &  &0.996 & 0.060   \\
1136%               \cline{2-7}
1137%     &\multirow{2}{*}{10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.496 & 0.500   \\
1138%     && C-DRO-Model & (1,1,7) &  &0.995 & 0.069  \\
1139%\hline
1140%\end{tabular}
1141%\end{center}
1142%\end{table}
1143%
1144%\begin{table}[h*]\footnotesize%\small%
1145%\caption{\label{tab-compare2} \footnotesize  Out-of-sample reliability level comparison ($R_0=0.90$), where `Design' specifies the number of components allocated in each of 3 subsystems, and `P-Model' and `C-DRO-Model' refer to the probabilistic model and robust model, respectively. }
1146%\begin{center}
1147%\begin{tabular}{|c||c|c|| c| c |c |c   |}\hline
1148%    $\mathcal{T}_R$ & Out of sample $\sigma$ &Model & Design &   Designed reliability level &Mean of out-of-sample reliability level & StD \\
1149%     \hline
1150%    \multirow{6}{*}{7.625} & \multirow{2}{*}{2}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.143 & 0.350   \\
1151%     && C-DRO-Model & (1,1,3) &  &0.653 & 0.476  \\
1152%        \cline{2-7}
1153%     &\multirow{2}{*}{6}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.576 & 0.494   \\
1154%     && C-DRO-Model & (1,1,3) &  &0.989 & 0.103   \\
1155%        \cline{2-7}
1156%     &\multirow{2}{*}{10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.560 & 0.496   \\
1157%     && C-DRO-Model & (1,1,3) &  &0.988 & 0.111   \\
1158%\hline
1159%    \multirow{6}{*}{7.75} & \multirow{2}{*}{2}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.528 & 0.499   \\
1160%     && C-DRO-Model & (1,1,4) &  &0.999 & 0.037   \\
1161%                 \cline{2-7}
1162%     &\multirow{2}{*}{6}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.538 & 0.499   \\
1163%     && C-DRO-Model & (1,1,4) &  &0.996 & 0.063   \\
1164%                  \cline{2-7}
1165%     &\multirow{2}{*}{10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.530 & 0.499   \\
1166%     && C-DRO-Model & (1,1,4) &  &0.994 & 0.080   \\
1167%\hline
1168%    \multirow{6}{*}{7.875} & \multirow{2}{*}{2}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.458 & 0.498   \\
1169%     && C-DRO-Model & (1,1,7) &  &0.996 & 0.060  \\
1170%               \cline{2-7}
1171%     &\multirow{2}{*}{6}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.481 & 0.500   \\
1172%     && C-DRO-Model & (1,1,7) &  &0.996 & 0.060   \\
1173%               \cline{2-7}
1174%     &\multirow{2}{*}{10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.496 & 0.500   \\
1175%     && C-DRO-Model & (1,1,7) &  &0.995 & 0.069  \\
1176%\hline
1177%\end{tabular}
1178%\end{center}
1179%\end{table}
1180%
1181%\begin{table}[h*]\footnotesize%\small%
1182%\caption{\label{tab-compare2} \footnotesize  Out-of-sample reliability level comparison ($R_0=0.90$), where `Design' specifies the number of components allocated in each of 3 subsystems, and `P-Model' and `C-DRO-Model' refer to the probabilistic model and robust model, respectively. }
1183%\begin{center}
1184%\begin{tabular}{|c||c|c|| c| c |c |c   |}\hline
1185%    $\mathcal{T}_R$ & Out of sample $\sigma$ &Model & Design &   Designed reliability level &Mean of out-of-sample reliability level & StD \\
1186%     \hline
1187%    \multirow{6}{*}{7.625} & \multirow{2}{*}{2}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.348 & 0.476   \\
1188%     && C-DRO-Model & (1,1,3) &  &0.930 & 0.256  \\
1189%        \cline{2-7}
1190%     & \multirow{2}{*}{6}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.427 & 0.495   \\
1191%     && C-DRO-Model & (1,1,3) &  &0.948 & 0.221  \\
1192%        \cline{2-7}
1193%     &\multirow{2}{*}{10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.445 & 0.497   \\
1194%     && C-DRO-Model & (1,1,3) &  &0.957 & 0.203   \\
1195%\hline
1196%    \multirow{6}{*}{7.75} & \multirow{2}{*}{2}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.278 & 0.448   \\
1197%     && C-DRO-Model & (1,1,4) &  &0.933 & 0.250   \\
1198%                 \cline{2-7}
1199%     &\multirow{2}{*}{6}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.381 & 0.486   \\
1200%     && C-DRO-Model & (1,1,4) &  &0.974 & 0.159   \\
1201%                 \cline{2-7}
1202%     &\multirow{2}{*}{10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.409 & 0.492   \\
1203%     && C-DRO-Model & (1,1,4) &  &0.979 & 0.143   \\
1204%\hline
1205%    \multirow{6}{*}{7.875} & \multirow{2}{*}{2}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.214 & 0.410   \\
1206%     && C-DRO-Model & (1,1,7) &  &0.909 & 0.287  \\
1207%               \cline{2-7}
1208%     & \multirow{2}{*}{6}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.345 & 0.475   \\
1209%     && C-DRO-Model & (1,1,7) &  &0.973 & 0.163  \\
1210%               \cline{2-7}
1211%     &\multirow{2}{*}{10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.383 & 0.486   \\
1212%     && C-DRO-Model & (1,1,7) &  &0.985 & 0.122   \\
1213%\hline
1214%\end{tabular}
1215%\end{center}
1216%\end{table}
1217%\fi
1218%
1219%\iffalse
1220%From the experiment result, we can observe that despite a smaller ambiguity set, our design ($x^{(2)}$) can achieve robustness level that is comparable to the design without clustering $x^{(1)}$, and are far better than the baseline probabilistic model ($x^{(3)}$).
1221%\fi
1222%
1223%\subsection{Comparison with a baseline probabilistic model}
1224%
1225%\begin{figure}[H]
1226%\centering
1227%\includegraphics[scale=0.8]{KVARIATION55.pdf}
1228%\caption{\footnotesize The series-parallel system we are studying in this section, consists of a single type of component, with active parallel strategy only. This simplicity is due to the limitation of baseline probabilistic model.}
1229%\label{figure1}
1230%\end{figure}
1231%
1232%To illustrate the performance of our robust reliability model, we compare the design ($x^{(1)}$) obtained from the proposed robust redundancy optimization model with the design ($x^{(2)}$) obtained from a probabilistic redundancy optimization model. We choose $K = 5$, correspond to $\lambda = 0.8$ in the previous subsection. As mentioned in the Introduction and Literature Review, when the situation involves multiple types ({\it i.e.,} $|\mathbf{M}_i|>1$), or both the cold-standby and active parallel redundant subsystems are considered, the probabilistic model generally becomes intractable. Therefore, for a fair comparison, we consider a series-parallel system with $|\mathbf{N}| = 3$ and $|\mathbf{M}_i|=1, \forall i \in [3]$, which preserves a linear MIP formulation for the probabilistic model. For a coherent exposition of the experimental study, we place the details of the probabilistic redundancy model as well as its MIP %transformation in the Electronic Companion.
1233%
1234%\iffalse
1235%In particular, we first randomly generate lifetime samples (size=2500) and then compute the probability levels $\P[\tilde{z}_{i}\le \mathcal{T}_R ], \forall i \in [3]$ and the parameters $(\hnu, \underline{\hmu}, \overline{\hmu}, \bm{\hsigma}, \p)$ from the generated lifetime samples for parameter inputs of probabilistic and robust models, respectively, where $R_0=0.95$ and $\mathcal{T}_R=7.5$. We obtain the designs by solving the perspective redundancy models. \fi We perform out-of-sample experiments in the similar way as the out-of-sample-test performed in Section 5.2. First, we keep the lifetime mean as the same ($\Delta_m = 0$), but increases the out-of-sample standard deviation ($\Delta_s = 500\%, 1000\%, 1500\%$, respectively). This corresponds to the first row of the figure. Then, we shrink the lifetime mean ($\Delta_m = 10\%, 12.5\%, 15\%$) as the same time as $\Delta_s$ enlarges. This corresponds to the second row of the figure. The out-of-sample system lifetimes are compared and plotted in figure, and the comparison of out-of-sample reliability levels is provided in table.
1236%\begin{table}[H]\footnotesize%\small%
1237%\caption{\label{tab-compare2} \footnotesize  Out-of-sample reliability level comparison ($R_0=0.95$), where `Design' specifies the number of components allocated in each of 3 subsystems, and `P-Model' and `C-DRO-Model' refer to the probabilistic model and robust model, respectively. }
1238%\begin{center}
1239%\begin{tabular}{|c||c|c|| c| c |c |c   |}\hline
1240%    $\mathcal{T}_R$ & $(\Delta_{m}, \Delta_{s})$ &Model & Design &   Designed reliability level &Mean of out-of-sample reliability level & StD \\
1241%     \hline
1242%    \multirow{12}{*}{7.5} & \multirow{2}{*}{(0, 500\%)}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.95$}& 0.952 & 0.214   \\
1243%     && C-DRO-Model & (1,2,8) &  &1.000 & 0.000  \\
1244%        \cline{2-7}
1245%     &\multirow{2}{*}{(0, 1000\%)}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.95$}& 0.886 & 0.318   \\
1246%     && C-DRO-Model & (1,2,8) &  &1.000 & 0.000  \\
1247%        \cline{2-7}
1248%     &\multirow{2}{*}{(0, 1500\%)}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.95$}& 0.800 & 0.400   \\
1249%     && C-DRO-Model & (1,2,8) &  &1.000 & 0.000  \\
1250%     \cline{2-7}
1251%     & \multirow{2}{*}{(10\%, 500\%)}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.95$}& 0.408 & 0.492   \\
1252%     && C-DRO-Model & (1,2,8) &  &0.995 & 0.069  \\
1253%        \cline{2-7}
1254%     &\multirow{2}{*}{(12.5\%, 1000\%)}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.95$}& 0.328 & 0.469   \\
1255%     && C-DRO-Model & (1,2,8) &  &0.970 & 0.172  \\
1256%        \cline{2-7}
1257%     &\multirow{2}{*}{(15\%, 1500\%)}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.95$}& 0.291 & 0.454   \\
1258%     && C-DRO-Model & (1,2,8) &  &0.959 & 0.198  \\
1259%\hline
1260%\end{tabular}
1261%\end{center}
1262%\end{table}
1263%
1264%\begin{figure}[H]
1265%\centering
1266%\includegraphics[width=\columnwidth]{Out_of_sample_def.png}
1267%\caption{\footnotesize The out-of-sample system lifetime scenarios of robust model with clustering and probabilistic model under different $\Delta_{m}$ and $\Delta_{s}$. Figure (a), (b), (c) represents the scenario that the mean of out-of-sample data is kept the same, with different $\Delta_{s}$ that is much larger than the training set; figure (d), (e), (f) represents the scenario that the mean of out-of-sample data shrinks ($\Delta_{m} > 0$), in additional to much larger $\Delta_{s}$. The vertical beam represents $\mathcal{T}_R$. The fraction of the lifetime histogram on the right side of beam represents the out-of-sample reliability level.}
1268%\label{figure_out_of_sample_test_def}
1269%\end{figure}
1270%
1271%We can observe that when the mean lifetime are kept the same, the robust model produces very robust designs, while the baseline probabilistic model produces design that works fine for moderate $\Delta_s$, but becomes unsatisfiable under larger $\Delta_s$; when the lifetime mean shrinks, however, the out-of-sample reliability of the design from baseline model becomes extremely awful, while the design from robust model can still keep the reliability level intact. This illustrates that our model outperforms the baseline model in that it is significantly more robust, especially under extreme circumstances. In addition, as mentioned before, our model is tractable for multi-type mixed strategy systems, while the probabilistic model will become intractable. Thus our model is superior in both robustness and computability.
1272%
1273%
1274%\subsection{Value of side information}
1275%In this subsection we experiment on clustering according to side information. The system we are studying is the same as the one in Section 5.2. We choose $K = 5$, corresponding to $\lambda = 0.8$ in the cross validation section. When generating samples from distributions, we also obtain the side information of which of the 5 distributions the sample is drawn from. We then cluster the data set by $K = 5$, based solely on the side information, and compute parameters $(\hnu, [\underline{\hmu}, \overline{\hmu}], \bm{\hsigma}, \p)$ of the model from it. We obtain design ($x^{(2)}$) from the model, and compare it with design ($x^{(1)}$) obtained from the model in which $K$ is also 5, but is directly clustered based on lifetime information instead of the side information.
1276%
1277%\begin{table}[H]\label{d-table}\scriptsize%\footnotesize%\small%
1278%\caption{ \footnotesize  The design table for $K = 5$ model with and without side information (S.I). AP, CS-I and CS-II represents the active parallel type, cold-standby type 1 and cold-standby type 2, respectively.}
1279%\begin{center}
1280%\begin{tabular}{|c|c| c|| ccccc || ccccc || ccccc |}\hline
1281%\multirow{2}{*}{S.I} & \multirow{2}{*}{Subsystem} & \multirow{2}{*}{Type} & \multicolumn{5}{c||}{$R_0=0.95$,~$\mathcal{T}_R$ (yrs) }& \multicolumn{5}{c||}{$R_0=0.97$,~$\mathcal{T}_R$ (yrs)}& \multicolumn{5}{c|}{$R_0=0.99$,~$\mathcal{T}_R$ (yrs)} \\
1282% \cline{4-18}
1283%      &&&28 & 28.5 & 29 &  29.5 & 30 &28 & 28.5 & 29 &  29.5 & 30 &28 & 28.5 & 29 &  29.5 & 30  \\
1284%     \cline{1-18}
1285%                    && AP & 1 & 1 &1 & 0 & 0  & 0 & 0 &0 & 0 & 0    & 1 & 1 & 1 & 1 & 1 \\                       &
1286%        1            & CS-l & 1 & 1 &1 & 2 & 2  & 2 & 2 &2 & 2 & 2    & 2 & 2 & 2 & 2 & 2 \\
1287%                    &&CS-ll & 0 & 0 &0 & 0 & 0  & 0 & 0 &0 & 0 & 0    & 0 & 0 & 0 & 0 & 0 \\
1288%     \cline{2-18}
1289%                    && AP  & 1 & 1 &1 & 1 & 1  & 1 & 1 &1 & 1 & 1    & 1 & 1 & 1 & 1 & 1 \\ With  &
1290%        2            & CS-l & 1 & 0 &0 & 0 & 0  & 1 & 0 &0 & 0 & 3    & 1 & 0 & 0 & 0 & 3 \\ S.I &
1291%                     & CS-ll & 1 & 2 &2 & 2 & 2  & 1 & 2 &2 & 2 & 0    & 1 & 2 & 2 & 2 & 0 \\
1292%     \cline{2-18}
1293%                    && AP & 1 & 0 &1 & 1 & 0  & 1 & 1 &0 & 1 & 1    & 0 & 0 & 0 & 1 & 1 \\                       &
1294%        3            & CS-l & 0 & 1 &3 & 0 & 1  & 0 & 0 &1 & 3 & 2    & 1 & 1 & 1 & 3 & 2 \\
1295%                    && CS-ll & 3 & 3 &1 & 3 & 3  & 3 & 3 &3 & 1 & 2    & 3 & 3 & 3 & 1 & 2 \\
1296%\hline
1297%\multicolumn{3}{|c||}{Design cost (k\$)} & 38.5& 39.5 &40&\!\! 44 &\!\! 44.5    & 43.5 & 44 &44.5& 45 & 46    & 54 & \!\! 54.5\!\! &54.5\!\!& 55\!\! &56 \\
1298%\hline
1299%\multicolumn{3}{|c||}{Cost saved (k\$)} & 1 & 4.5 &4&\!\! 0 &\!\! 1    & 0.5 & 1 &0.5& 1 & 1.5    & 0 & \!\! 0.5\!\! &0.5\!\!& 0\!\! &0 \\
1300%\cline{1-18}
1301%     \cline{2-18}
1302%                    && AP & 1 & 0 &0 & 0 & 0  & 0 & 0 &0 & 0 & 0    & 1 & 1 & 1 & 1 & 1 \\                          &
1303%        1            & CS-l & 1 & 2 &2 & 2 & 2  & 2 & 2 &2 & 2 & 2    & 2 & 2 & 2 & 2 & 2 \\
1304%                    && CS-ll & 0 & 0 &0 & 0 & 0  & 0 & 0 &0 & 0 & 0    & 0 & 0 & 0 & 0 & 0 \\
1305%     \cline{2-18}
1306%                    && AP & 1 & 1 &1 & 1 & 1  & 1 & 1 &1 & 1 & 1    & 1 & 1 & 1 & 1 & 1 \\ Without &
1307%        2            & CS-l & 1 & 2 &0 & 0 & 3  & 1 & 0 &0 & 2 & 2    & 1 & 0 & 0 & 0 & 3 \\ S.I &
1308%                     & CS-ll & 1 & 0 &2 & 2 & 0  & 1 & 2 &2 & 1 & 1    & 1 & 2 & 2 & 2 & 0 \\
1309%     \cline{2-18}
1310%                    && AP  & 1 & 1 &1 & 1 & 1  & 0 & 1 &1 & 1 & 0    & 0 & 1 & 1 & 1 & 1 \\                          &
1311%       3             & CS-l & 3 & 3 &0 & 0 & 3  & 1 & 3 &3 & 3 & 2    & 1 & 3 & 3 & 3 & 2 \\
1312%                    && CS-ll& 1 & 1 &3 & 3 & 1  & 3 & 1 &1 & 1 & 3    & 3 & 1 & 1 & 1 & 2 \\
1313%\hline
1314%\multicolumn{3}{|c||}{Design cost (k\$)} & 39.5& 44 &44&\!\! 44 &\!\! 45.5    & 44 & 45 &45& 46 & 47.5    & 54 & \!\! 55\!\! &55\!\!& 55\!\! &56 \\
1315%\hline
1316%\end{tabular}
1317%\end{center}
1318%\end{table}
1319%
1320%The result shows that when side information is incorporated, we can achieve a design with much lower cost. We then choose $\mathcal{T}_R = 28.5$, which correspond to the largest cost saved, and perform out-of-sample tests, in the similar way as previous experiments. We can observe that even with this significant cost saved, the design obtained by clustering by side information still performs well enough under mean shrink and large standard deviation.
1321%
1322%\begin{table}[htp]\footnotesize%\small%
1323%\caption{\label{tab-compare3} \footnotesize The out-of-sample result of designs obtained at $\mathcal{T}_R = 28.5$. $R_0$ is set to $0.9$. In the design columns, the 3 columns are corresponding to different subsystems. The three numbers in each column indicates the number of redundancies used that is active parallel, cold-standby type I, or cold-standby type II, respectively. }
1324%\begin{center}
1325%\begin{tabular}{|c||c|c|| c|c|c| c |  c |c  |}\hline
1326%\multirow{2}{*}{$\mathcal{T}_R$} & \multirow{2}{*}{($\Delta_{m}, \Delta_{s}$)} &\multirow{2}{*}{Model} & \multicolumn{3}{c|}{Design} & \multirow{2}{*}{cost}  & \multirow{2}{*}{Mean of out-of-sample reliability level} & \multirow{2}{*}{StD} \\
1327%\cline{4-6}
1328%&&& 1 & 2 & 3 &&&\\
1329%\hline
1330%\multirow{6}{*}{28.5} & \multirow{2}{*}{(5\%, 800\%)} &  With S.I & (1,1,0) & (1,0,2) & (0,1,3)& 39.5 &0.985 & 0.122   \\
1331%     & & Without S.I & (0,2,0) & (1,2,0) & (1,3,1)& 44  &1.000 & 0.000 \\
1332%    \cline{2-9}
1333%     &\multirow{2}{*}{(5\%, 1000\%)}  &  With S.I & (1,1,0) & (1,0,2) & (0,1,3)& 39.5 & 0.970 & 0.172   \\
1334%     & & Without S.I & (0,2,0) & (1,2,0) & (1,3,1)& 44  &0.999 & 0.028 \\
1335%    \cline{2-9}
1336%     &\multirow{2}{*}{(5\%, 1200\%)}  &  With S.I & (1,1,0) & (1,0,2) &(0,1,3)& 39.5 & 0.968 & 0.175   \\
1337%     & & Without S.I & (0,2,0) & (1,2,0) & (1,3,1)& 44  &0.996 & 0.060 \\
1338%\hline
1339%\end{tabular}
1340%\end{center}
1341%\end{table}
1342%
1343%\begin{figure}[H]
1344%\centering
1345%\includegraphics[width=\columnwidth]{Out_of_sample_side.png}
1346%\caption{\footnotesize The out-of-sample test result of designs obtained at $\mathcal{T}_R = 28.5$, with and without side information (S.I).  The vertical beam represents $\mathcal{T}_R$. The fraction of the lifetime histogram on the right side of beam represents the out-of-sample reliability level.}
1347%\label{figure4}
1348%\end{figure}
1349%
1350%
1351%
1352%\iffalse
1353%\begin{table}[H]\footnotesize%\small%
1354%\caption{\label{tab-compare2} \footnotesize  Out-of-sample reliability level comparison ($R_0=0.95$), where `Design' specifies the number of components allocated in each of 3 subsystems, and `P-Model' and `C-DRO-Model' refer to the probabilistic model and robust model, respectively. }
1355%\begin{center}
1356%\begin{tabular}{|c||c|c|| c| c |c |c   |}\hline
1357%    $\mathcal{T}_R$ & $\Delta_{m}, \Delta_{s}$ &Model & Design &   Designed reliability level &Mean of out-of-sample reliability level & StD \\
1358%     \hline
1359%    \multirow{12}{*}{7.5} & \multirow{2}{*}{1, 5}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.95$}& 0.952 & 0.214   \\
1360%     && C-DRO-Model & (1,2,8) &  &1.000 & 0.000  \\
1361%        \cline{2-7}
1362%     &\multirow{2}{*}{1, 10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.95$}& 0.886 & 0.318   \\
1363%     && C-DRO-Model & (1,2,8) &  &1.000 & 0.000  \\
1364%        \cline{2-7}
1365%     &\multirow{2}{*}{1, 20}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.95$}& 0.800 & 0.400   \\
1366%     && C-DRO-Model & (1,2,8) &  &1.000 & 0.000  \\
1367%\hline
1368%\end{tabular}
1369%\end{center}
1370%\end{table}
1371%
1372%\begin{table}[!htbp]\label{d-table}\scriptsize%\footnotesize%\small%
1373%\caption{ \footnotesize  The design table for $K = 10$ model with side information}
1374%\begin{center}
1375%\begin{tabular}{|c| c|| ccccc || ccccc || ccccc |}\hline
1376%\multirow{2}{*}{Subsystem} & \multirow{2}{*}{Type} & \multicolumn{5}{c||}{$R_0=0.95$,~$\mathcal{T}_R$ (yrs) }& \multicolumn{5}{c||}{$R_0=0.97$,~$\mathcal{T}_R$ (yrs)}& \multicolumn{5}{c|}{$R_0=0.99$,~$\mathcal{T}_R$ (yrs)} \\
1377% \cline{3-17}
1378%      &&28 & 28.5 & 29 &  29.5 & 30 &28 & 28.5 & 29 &  29.5 & 30 &28 & 28.5 & 29 &  29.5 & 30  \\
1379%     \hline
1380%                    & 1 & 1 & 1 &1 & 0 & 0  & 0 & 0 &0 & 0 & 0    & 1 & 1 & 1 & 1 & 1 \\
1381%        1           & 2 & 1 & 1 &1 & 2 & 2  & 2 & 2 &2 & 2 & 2    & 2 & 2 & 2 & 2 & 2 \\
1382%                    & 3 & 0 & 0 &0 & 0 & 0  & 0 & 0 &0 & 0 & 0    & 0 & 0 & 0 & 0 & 0 \\
1383%     \hline
1384%                    & 1 & 1 & 1 &1 & 1 & 1  & 1 & 1 &1 & 1 & 1    & 1 & 1 & 1 & 1 & 1 \\
1385%        2           & 2 & 1 & 0 &0 & 0 & 0  & 1 & 0 &0 & 0 & 3    & 1 & 0 & 0 & 0 & 3 \\
1386%                    & 3 & 1 & 2 &2 & 2 & 2  & 1 & 2 &2 & 2 & 0    & 1 & 2 & 2 & 2 & 0 \\
1387%     \hline
1388%                    & 1 & 1 & 0 &1 & 1 & 0  & 1 & 1 &0 & 1 & 1    & 0 & 0 & 0 & 1 & 1 \\
1389%       3            & 2 & 0 & 1 &3 & 0 & 1  & 0 & 0 &1 & 3 & 2    & 1 & 1 & 1 & 3 & 2 \\
1390%                    & 3 & 3 & 3 &1 & 3 & 3  & 3 & 3 &3 & 1 & 2    & 3 & 3 & 3 & 1 & 2 \\
1391%\hline
1392%\multicolumn{2}{|c||}{Design cost (k\$)} & 38.5& 39.5 &40&\!\! 44 &\!\! 44.5    & 43.5 & 44 &44.5& 45 & 46    & 54 & \!\! 54.5\!\! &54.5\!\!& 55\!\! &56\\
1393%\hline
1394%\end{tabular}
1395%\end{center}
1396%\end{table}
1397%
1398%
1399%\begin{table}[!htbp]\label{d-table}\scriptsize%\footnotesize%\small%
1400%\caption{ \footnotesize  The design table for $K = 10$ model without side information}
1401%\begin{center}
1402%\begin{tabular}{|c| c|| ccccc || ccccc || ccccc |}\hline
1403%\multirow{2}{*}{Subsystem} & \multirow{2}{*}{Type} & \multicolumn{5}{c||}{$R_0=0.95$,~$\mathcal{T}_R$ (yrs) }& \multicolumn{5}{c||}{$R_0=0.97$,~$\mathcal{T}_R$ (yrs)}& \multicolumn{5}{c|}{$R_0=0.99$,~$\mathcal{T}_R$ (yrs)} \\
1404% \cline{3-17}
1405%      &&28 & 28.5 & 29 &  29.5 & 30 &28 & 28.5 & 29 &  29.5 & 30 &28 & 28.5 & 29 &  29.5 & 30  \\
1406%     \hline
1407%                    & 1 & 1 & 0 &0 & 0 & 0  & 0 & 0 &0 & 0 & 0    & 1 & 1 & 1 & 1 & 1 \\
1408%        1           & 2 & 1 & 2 &2 & 2 & 2  & 2 & 2 &2 & 2 & 2    & 2 & 2 & 2 & 2 & 2 \\
1409%                    & 3 & 0 & 0 &0 & 0 & 0  & 0 & 0 &0 & 0 & 0    & 0 & 0 & 0 & 0 & 0 \\
1410%     \hline
1411%                    & 1 & 1 & 1 &1 & 1 & 1  & 1 & 1 &1 & 1 & 1    & 1 & 1 & 1 & 1 & 1 \\
1412%        2           & 2 & 1 & 2 &0 & 0 & 3  & 1 & 0 &0 & 2 & 2    & 1 & 0 & 0 & 0 & 3 \\
1413%                    & 3 & 1 & 0 &2 & 2 & 0  & 1 & 2 &2 & 1 & 1    & 1 & 2 & 2 & 2 & 0 \\
1414%     \hline
1415%                    & 1 & 1 & 1 &1 & 1 & 1  & 0 & 1 &1 & 1 & 0    & 0 & 1 & 1 & 1 & 1 \\
1416%       3            & 2 & 3 & 3 &0 & 0 & 3  & 1 & 3 &3 & 3 & 2    & 1 & 3 & 3 & 3 & 2 \\
1417%                    & 3 & 1 & 1 &3 & 3 & 1  & 3 & 1 &1 & 1 & 3    & 3 & 1 & 1 & 1 & 2 \\
1418%\hline
1419%\multicolumn{2}{|c||}{Design cost (k\$)} & 39.5& 44 &44&\!\! 44 &\!\! 45.5    & 44 & 45 &45& 46 & 47.5    & 54 & \!\! 55\!\! &55\!\!& 55\!\! &56\\
1420%\hline
1421%\end{tabular}
1422%\end{center}
1423%\end{table}
1424%\fi
1425%
1426%\iffalse
1427%Data creating process finised
1428%Academic license - for non-commercial use only
1429%cost is 38.5003857478587 , \mathcal{T}_R is 26, R_0 is 0.95
1430%
1431%(((1.0, -0.0, -0.0), (1.0, -0.0, -0.0), (7.563683503866358e-06, 7.563683503866358e-06, 7.563683503866358e-06)), ((-0.0, -0.0, 1.0), (-0.0, 1.0, -0.0), (1.0, -0.0, -0.0)), ((-0.0, -0.0, -0.0), (1.0, -0.0, 1.0), (1.0, 1.0, -0.0)))
1432%cost is 43.0 , \mathcal{T}_R is 26, R_0 is 0.97
1433%
1434%(((-0.0, -0.0, -0.0), (-0.0, 1.0, 1.0), (-0.0, -0.0, -0.0)), ((-0.0, -0.0, 1.0), (-0.0, 1.0, 1.0), (-0.0, -0.0, -0.0)), ((-0.0, -0.0, -0.0), (1.0, 1.0, -0.0), (1.0, -0.0, 1.0)))
1435%cost is 53.0 , \mathcal{T}_R is 26, R_0 is 0.99
1436%
1437%(((-0.0, 1.0, -0.0), (-0.0, 1.0, 1.0), (-0.0, -0.0, -0.0)), ((-0.0, 1.0, -0.0), (-0.0, 1.0, 1.0), (-0.0, -0.0, -0.0)), ((-0.0, -0.0, -0.0), (1.0, 1.0, -0.0), (-0.0, 1.0, 1.0)))
1438%cost is 44.0 , \mathcal{T}_R is 27, R_0 is 0.95
1439%
1440%(((-0.0, -0.0, -0.0), (1.0, -0.0, 1.0), (0.0, -0.0, -0.0)), ((-0.0, 1.0, -0.0), (1.0, -0.0, -0.0), (1.0, -0.0, -0.0)), ((-0.0, -0.0, -0.0), (-0.0, -0.0, 1.0), (1.0, 1.0, 1.0)))
1441%cost is 44.0 , \mathcal{T}_R is 27, R_0 is 0.97
1442%
1443%(((-0.0, -0.0, -0.0), (1.0, 1.0, -0.0), (0.0, -0.0, -0.0)), ((-0.0, -0.0, 1.0), (-0.0, 1.0, -0.0), (0.0, 1.0, -0.0)), ((-0.0, -0.0, -0.0), (-0.0, 1.0, -0.0), (1.0, 1.0, 1.0)))
1444%cost is 54.0 , \mathcal{T}_R is 27, R_0 is 0.99
1445%
1446%(((-0.0, 1.0, -0.0), (-0.0, 1.0, 1.0), (0.0, -0.0, -0.0)), ((-0.0, 1.0, -0.0), (1.0, -0.0, -0.0), (-0.0, -0.0, 1.0)), ((-0.0, -0.0, -0.0), (-0.0, 1.0, -0.0), (1.0, 1.0, 1.0)))
1447%cost is 44.0 , \mathcal{T}_R is 28, R_0 is 0.95
1448%
1449%(((-0.0, -0.0, -0.0), (-0.0, 1.0, 1.0), (-0.0, -0.0, -0.0)), ((-0.0, 1.0, -0.0), (-0.0, 1.0, -0.0), (-0.0, 1.0, -0.0)), ((-0.0, -0.0, -0.0), (-0.0, -0.0, 1.0), (1.0, 1.0, 1.0)))
1450%cost is 45.00017123897305 , \mathcal{T}_R is 28, R_0 is 0.97
1451%
1452%(((5.707965768311441e-06, 5.707965768311441e-06, 5.707965768311441e-06), (1.0, 1.0, -0.0), (0.0, 0.0, -0.0)), ((0.0, 1.0, -0.0), (1.0, 1.0, 1.0), (-0.0, -0.0, -0.0)), ((-0.0, -0.0, 0.0), (-0.0, 1.0, -0.0), (1.0, 1.0, 1.0)))
1453%cost is 54.0 , \mathcal{T}_R is 28, R_0 is 0.99
1454%
1455%(((-0.0, -0.0, 1.0), (-0.0, 1.0, 1.0), (-0.0, -0.0, -0.0)), ((-0.0, 1.0, -0.0), (-0.0, 1.0, -0.0), (-0.0, 1.0, -0.0)), ((-0.0, -0.0, -0.0), (-0.0, -0.0, 1.0), (1.0, 1.0, 1.0)))
1456%cost is 45.0 , \mathcal{T}_R is 29, R_0 is 0.95
1457%
1458%(((-0.0, -0.0, -0.0), (1.0, -0.0, 1.0), (-0.0, -0.0, -0.0)), ((-0.0, -0.0, 1.0), (-0.0, -0.0, -0.0), (1.0, -0.0, 1.0)), ((-0.0, 1.0, -0.0), (1.0, 1.0, 1.0), (-0.0, -0.0, 1.0)))
1459%cost is 54.5 , \mathcal{T}_R is 29, R_0 is 0.97
1460%
1461%(((-0.0, -0.0, 1.0), (-0.0, 1.0, 1.0), (-0.0, -0.0, -0.0)), ((1.0, -0.0, -0.0), (-0.0, -0.0, -0.0), (1.0, 1.0, -0.0)), ((-0.0, -0.0, -0.0), (1.0, -0.0, -0.0), (1.0, 1.0, 1.0)))
1462%cost is 55.0 , \mathcal{T}_R is 29, R_0 is 0.99
1463%
1464%(((-0.0, -0.0, 1.0), (1.0, 1.0, -0.0), (-0.0, -0.0, -0.0)), ((-0.0, -0.0, 1.0), (-0.0, -0.0, -0.0), (1.0, -0.0, 1.0)), ((-0.0, 1.0, -0.0), (1.0, 1.0, 1.0), (-0.0, 1.0, -0.0)))
1465%cost is 55.0 , \mathcal{T}_R is 30, R_0 is 0.95
1466%
1467%(((-0.0, -0.0, 1.0), (1.0, 1.0, -0.0), (-0.0, -0.0, -0.0)), ((-0.0, -0.0, 1.0), (1.0, 1.0, 1.0), (-0.0, -0.0, -0.0)), ((-0.0, -0.0, -0.0), (1.0, -0.0, -0.0), (1.0, 1.0, 1.0)))
1468%cost is 55.5 , \mathcal{T}_R is 30, R_0 is 0.97
1469%
1470%(((-0.0, -0.0, 1.0), (-0.0, 1.0, 1.0), (0.0, -0.0, -0.0)), ((-0.0, -0.0, 1.0), (1.0, 1.0, 1.0), (-0.0, -0.0, -0.0)), ((-0.0, -0.0, 1.0), (1.0, 1.0, 1.0), (-0.0, 1.0, -0.0)))
1471%cost is 56.0 , \mathcal{T}_R is 30, R_0 is 0.99
1472%
1473%(((1.0, -0.0, -0.0), (1.0, 1.0, -0.0), (-0.0, -0.0, -0.0)), ((-0.0, -0.0, 1.0), (1.0, 1.0, 1.0), (-0.0, -0.0, -0.0)), ((1.0, 0.0, -0.0), (1.0, 1.0, -0.0), (-0.0, 1.0, 1.0)))
1474%\fi
1475%
1476%\subsection{Case study}
1477%
1478%
1479%\section{Conclusion}
1480%
1481%\begin{thebibliography}{}
1482%\bibitem{Aggarwal2012}Aggarwal, C. C., Y. Zhao, P. S. Yu. 2012. On Text Clustering with Side Information. {\em 2012 IEEE 28th International Conference on Data Engineering}, 894--904.
1483%
1484%\bibitem{Ardakan2014} Ardakan, M. A., A. Z. Hamadani. 2014. Reliability-redundancy allocation problem with cold-standbyredundancy strategy. {\em Simulation Modelling Practice and Theory}~{\bf 42}:107--118.
1485%
1486%\bibitem{Bertsimas2011}Bertsimas, D., M. Sim. 2004. The price of robustness. {\em Operations Research}~{\bf 52}(1):35--53.
1487%
1488%\bibitem{Bhunia2010} Bhunia, A. K., L. Sahoo,  D. Roy. 2010. Reliability stochastic optimization
1489%for a series system with interval component reliability via genetic
1490%algorithm.  {\em Appl.Math. Computat.}~{\bf 216}~(3): 929--939, 2010.
1491%
1492%
1493%\bibitem{Chern1992}Chern, M.S. 1992. On the computational complexity of reliability redundancy allocation in a series system. {\em Operations research letters}~{\bf 11}~(5):309--315.
1494%
1495%\bibitem{Cheng2009}Cheng, Z., X. Wang, C. Tian, F. Wang. 2009. Mission reliability simulation of High-speed EMU service braking system. {\em Proceedings of the 8th International Conference on Reliability, Maintainability and Safety} (ICRMS 2009), 253--256.
1496%
1497%\bibitem{Coit1996}Coit, D. W., A.E. Smith. 1996. Solving the redundancy allocation problem using a combined neural network/genetic algorithm approach. {\em Computers \& Operations Research}~{\bf 23}~(6):515--526.
1498%
1499%\bibitem{Coit1998}Coit, D. W., A.E. Smith. 1998. Redundancy allocation to maximize a lower percentile of the system time-to-failure distribution. {\em IEEE Transactions on Reliability}~ {\bf 47}~(1):79--87.
1500%
1501%\bibitem{Coit2003}Coit, D. W. 2003. Maximization of system reliability with a choice of redundancy strategies. {\em IIE Transactions}~{\bf 35}~(6):535-543.
1502%
1503%\bibitem{Coit2004} Coit, D.W.,  T. Jin,  N. Wattanapongsakorn. 2004. System optimization
1504%with component reliability estimation uncertainty: A multi-criteria approach. {\em IEEE Trans. Rel.}~ {\bf 53}~(3) :  369--380, 2004.
1505%
1506%
1507%
1508%
1509%\bibitem{Elsayed2012}Elsayed, E A. 2012. {\em Reliability Engineering}. 2nd Edition. Wiley.
1510%
1511%
1512%\bibitem{Govindan2017}Govindan, K., A. Jafarian, M.E. Azbari, T.M. Choi. 2017. Optimal bi-objective redundancy allocation for systems reliability and risk management. {\em IEEE Transactions on Cybernetics}~{\bf 46}~(8):1735--1748.
1513%
1514%
1515%
1516%%\bibitem{Lam2012}Lam SW, T.S. Ng, and M. Sim. (2012). Multiple objectives satisficing under uncertainty. To appear in Operations Research, 2012.
1517%
1518%%\bibitem{Lin2011}Lin J, Muthuraman K, Lawley M (2011) Optimal and approximate algorithms for sequential clinical scheduling with no-shows. {\it IIE Transactions on Healthcare Systems Engineering} 1:20--36.
1519%
1520% %\bibitem{McCarthy2000} McCarthy K, McGee HM, O'Boyle CA. 2000. Outpatient clinic waiting times and non-attendance as indicators of quality. {\it Psychology, Health and Medicine} 5: 287--293.
1521%
1522%\bibitem{Grani2017} Hanasusanto, G. A., V. Roitch, D. Kuhn, W. Wiesemann. 2017. Ambiguous joint chance constraints under mean and dispersion information. {\em Operations Research}~{\bf 65}~(3):715--767.
1523%
1524%
1525%
1526%
1527%
1528%\bibitem{Elegbede2003}Elegbede, A.C., C. Chu, K.H. Adjallah, F. Yalaoui. 2003. Reliability allocation through cost minimization. {\em IEEE Transactions on reliability}~{\bf 52}~(1):106--111.
1529%
1530%\bibitem{Feizollahi2012} Feizollahi, M.J., M. Modarres. 2012. The robust deviation redundancy allocation problem with interval component reliabilities. {\em IEEE Transactions on reliability}~{\bf 61}~(4):957--965.
1531%
1532%
1533%
1534%
1535%
1536%\bibitem{Feizollahi2014}Feizollahi, M.J., S. Ahmed, M. Modarres. 2014. The robust redundancy allocation problem in series-parallel systems with budgeted uncertainty.  {\em IEEE Transactions on reliability}~{\bf 63}~(1):239--250.
1537%
1538%\bibitem{Feizollahi2015} Feizollahi, M.J., R. Soltan, H. Feyzollahi. 2015. The robust cold standby redundancy allocation in series-parallel systems with budgeted uncertainty. {\em IEEE Transactions on reliability}~{\bf 64}~(2):799--806.
1539%
1540%\bibitem{Hasegawa1999}Hasegawa, I., Uchida, S. 1999. Braking systems. {\em Japan Railway and Transport Review}~{\bf 20}:52-59.
1541%
1542%\bibitem{Kulturel-Konak2003}Kulturel-Konak, S., A. Smith., D., Coit. 2003. Efficiently Solving the Redundancy Allocation Problem Using Tabu Search. {\em IIE Transactions.}~{\bf 35}:515--526.
1543%
1544%\bibitem{Kuo2001}Kuo, W., V.R. Prasad, F.A. Tillman, C.L. Hwang. 2001. {\em Optimal Reliability Design: Fundamentals and Applications.} Cambridge university press. Cambridge.
1545%
1546%\bibitem{Kuo2007}Kuo, W., R. Wan. 2007. Recent advances in optimal reliability allocation. {\em IEEE Transactions on Systems, Man, and Cybernetics-Part A: Systems and Humans}, {\bf 37}~(2):143-156.
1547%
1548%\bibitem{Li2014}Li, Y.F., Y. Ding, E. Zio. 2014. Random fuzzy extension of the universal generating function approach for the
1549%reliability assessment of multi-state systems under aleatory and epistemic uncertainties. {\em IEEE Transactions on Reliability}~{\bf 63}~(1):13--25.
1550%
1551%\bibitem{Li2011}Li, C.Y., X. Chen, X.S. Yi, J.Y. Tao. 2011. Interval-valued reliability analysis of multi-state systems. {\em IEEE Transactions on Reliability}~{\bf 60}~(1):323--330.
1552%
1553%\bibitem{Li2008} Li, X.,  X. Hu. 2008. Some new stochastic comparisons for redundancy
1554%allocations in series and parallel systems.~{\em  Statist. Probabil. Lett.}~{\bf 78}~(18): 3388--3394.
1555%
1556%\bibitem{Liang2004} Liang, Y., A. E. Smith. 2004 An ant colony optimization algorithm for the redundancy allocation problem (RAP). {\em IEEE Transactions on Reliability}~{\bf 53}~(3):417--423.
1557%
1558%\bibitem{Liao2014}Liao, L., F. K\"{o}ttig. 2014. Review of hybrid prognostics approaches for remaining useful life prediction of engineered systems, and an application to battery life prediction. {\em IEEE Transactions on Reliability}. {\bf 63}~(1):191--207.
1559%
1560%\bibitem{Liu2015 }Liu, H., Y. Fu. 2015. Clustering with Partition Level Side Information. {\em 2015 IEEE International Conference on Data Mining}, 877--882.
1561%
1562%\bibitem{Military1992}Military, U.S. 1992. Reliability prediction of electronic equipment. MIL-HDBK-217F Notice 1.
1563%
1564%\bibitem{Marseguerra2005} Marseguerra, M., E. Zio, L. Podofillini, D. W. Coit. 2005. Optimal design
1565%of reliable network systems in presence of uncertainty. {\em IEEE
1566%Trans. Rel.}~{\bf 54}~(2): 243--253.
1567%
1568%
1569%\bibitem{Ng2014} Ng, S. Y., Xing Y., K. L. Tsui. 2014. A naive Bayes model for robust remaining useful life prediction of lithium-ion battery. {\em Applied Energy}~{\bf 118}: 114-123.
1570%
1571%
1572%
1573%
1574%\bibitem{Prasad2001}Prasad, V. R., W. Kuo, K. O. Kim. 2001. Maximization of a percentile life of a series system through component redundancy allocation. {\em IIE Transactions}~{\bf 33}~(12):1071--1079.
1575%
1576%\bibitem{Pecht2008} Pecht, M. 2008. {\em Prognostics and Health Management of Electronics.} John Wiley \& Sons, Ltd.
1577%
1578%\bibitem{Shang2017} Shang, C., X. Huang, F. You. 2017. Data-driven robust optimization based on kernel learning. {\em Computers \& Chemical Engineering}~{\bf 106}:464--479.
1579%
1580%\bibitem{Shapiro2001}Shapiro~A.~2001.~On duality theory of conic linear problems. In {\em Semi-Infinite Programming}, chapter 7, 135--165, Kluwer Academic Publishers, 2001.
1581%
1582%\bibitem{Soltani2015}Soltani R., J. Safari, S.J. Sadjadi. 2015. Robust counterpart optimization for the redundancy allocation problem in series-parallel systems with component mixing under uncertainty. {\em Applied Mathematics \& Computation}~{\bf 271}~(C): 80--88.
1583%
1584%
1585%
1586%
1587%\bibitem{Sun2017} Sun, M. X., Y. F. Li, E. Zio. 2017. On the optimal redundancy allocation for multi-state series-parallel systems under epistemic uncertainty. {\em Reliability Engineering \& System Safety}. Accepted.
1588%
1589%\bibitem{Tang2014}Tang, S., C. Yu, X. Wang, X. Guo, X. Si. 2014. Remaining useful life prediction of lithium-ion batteries based on the wiener process with measurement error. {\em Energies}~{\bf 7}~(2):520--547.
1590%
1591%\bibitem{Tekiner-Mogulkoc2011}Tekiner-Mogulkoc, H., D. W. Coit. 2011. System reliability optimization
1592%considering uncertainty: Minimization of the coefficient of variation
1593%for series-parallel systems.~{\em  IEEE Trans. Rel.}~{\bf 60}~(30): 667--674, 2011.
1594%
1595%
1596%\bibitem{Wang2012} Wang, Y., L. Li, S. Huang, Q. Chang. 2012. Reliability and covariance estimation of weighted k-out-of-n multi-state Systems. {\em European Journal of Operational Research}~{\bf 221}:~138--147.
1597%
1598%\bibitem{Wang2019} Wang, S., Y. Li, T. Jia.  2019. Distributionally Robust Design for Redundancy Allocation. {\em INFORMS Journal on Computing}, in press.
1599%
1600%\bibitem{Wisemann2014} Wiesemann, W.,  D. Kuhn, M. Sim. 2014. Distributionally robust convex optimization. {\em Operations Research}~ {\bf 62} ~(6)~ 1358--1376.
1601%
1602%\bibitem{Xie2017} Xie, W., Ahmed, S. 2017. Distributionally robust chance constrained optimal power flow with renewables: A conic reformulation. {\em IEEE Transactions on Power Systems.} Accepted.
1603%
1604%\bibitem{Xing2002} Xing, E.P., A.Y, Ng, M.I., Jordan, S., Russell. 2002. Distance metric learning, with application to clustering with side-information. {\em Proceedings of the 15th International Conference on Neural Information Processing Systems}, ~521--528
1605%
1606%
1607%\bibitem{Yalaoui2005}Yalaoui, A., E.  Chatelet, C. Chu. 2005. A new dynamic programming method for reliability redundancy allocation in a parallel-series system. {\em IEEE transactions on reliability}.~{\bf 54}~(2):254--261.
1608%
1609%
1610%\bibitem{Zaretalab2015}Zaretalab, A., V. Hajipour, M. Sharifi, M. R. Shahriari. 2015. A knowledge-based archive multi-objective simulated annealing algorithm to optimize series-parallel system with choice of redundancy strategies. {\em Computers \& Industrial Engineering}~{\bf 80}:33-44.
1611%
1612%\bibitem{Zhao2003} Zhao, R., B. Liu. 2003. Stochastic programming models for general redundancy-optimization problems.~{\em IEEE Trans. Rel.}~{\bf 52}~(2): 181--191, 2003.
1613%
1614%\bibitem{Zhao2011} Zhao, P., P.S. Chan, H.K.T. Ng. 2011. Optimal allocation of redundancies in series systems. {\em European Journal of Operational Research}~{\bf 220}~(3):673--683.
1615%
1616%\bibitem{Kuhn2013}Zymler, S., D. Kuhn, B. Rustem. 2013. Distributionally robust joint chance constraints with second-order moment information, {\em Mathematical Programming}~{\bf 137}~(1-2):167--198.
1617%\end{thebibliography}
1618%\newpage
1619%\ECSwitch
1620%\ECHead{Electronic Companion}\small
1621%\iffalse
1622%\section{Implementation Of K-means}
1623%The detail of K-means algorithm:
1624%
1625%1. Initialization: Randomly pick $K$ data points in $\mathcal{Z}$ to be the initial means $\hmu_1, \hmu_2, ... \hmu_K$.
1626%
1627%2. Assignment step: Assign each data point to one cluster, according to the rule
1628%\begin{equation}\label{k-means-assignment}
1629%\bm{z} \in \mathcal{Z}_{\argmin\limits_{k \in [K]}\|\bm{z} - \hmu_k\|^2}, \forall \bm{z} \in \mathcal{Z}
1630%\end{equation} and some tie-breaking rule.
1631%
1632%3. Update step: Compute the mean of each cluster:
1633%\begin{equation}\label{k-means-update}
1634%\hmu_k = \frac{1}{|\mathcal{Z}_k|}\sum\limits_{\bm{z} \in \mathcal{Z}_k}\bm{z}, \forall k \in [K]
1635%\end{equation}
1636%
1637%4. Check for convergence: if the new means computed in step 3 is the same as the before, then the algorithm converges and stops. Otherwise, go back to step 2.
1638%\fi
1639%
1640%\section{Discussion Of The Parallel Case}
1641%In this section, we discuss the case in which the cold-standby and active-parallel parts of a subsystem is activated together. First, consider the following worst-case probabilistic chance function in the problem given system design $\x$:
1642%\begin{equation}\label{Prob-2}
1643%\displaystyle \inf\limits_{\P \in \mathbb{F}_K}\P\left[\min\limits_{i \in \mathbf{N}}\left(\sum_{j \in \mathbf{M}^{\rm c}_i}\sum_{t\in \mathbf{T}_{ij}} \tilde{z}_{ijt}x_{ijt} \bigvee \max_{j \in \mathbf{M}^{\rm a}_i}  \max_{t \in \mathbf{T}_{ij}}\tilde{z}_{ijt}x_{ijt}\right)> \mathcal{T}_R \right].
1644% \end{equation}
1645%For a better exposition of our approach, we denote by
1646%\begin{equation}\label{constraint-set-2}
1647%\mathcal{Z}_i(\x):=\left\{\bm{z} \in \mathbb{R}^H_+ ~\left|~ \sum_{j \in \mathbf{M}^{\rm c}_i}\sum_{t\in \mathbf{T}_{ij}} \tilde{z}_{ijt}x_{ijt} \bigvee \max_{j \in \mathbf{M}^{\rm a}_i}  \max_{t \in \mathbf{T}_{ij}}\tilde{z}_{ijt}x_{ijt} > \mathcal{T}_R \right.\right \}, \forall i \in \mathbf{N},
1648%\end{equation}
1649%while the complimentary set of $\mathcal{Z}_i$ for each $i \in \mathbf{N}$, denoted by $\overline{\mathcal{Z}}_i$,  is
1650%\begin{equation}\label{set-LT-2}
1651%\overline{\mathcal{Z}}_i(\x)=\overline{\mathcal{Z}}^{\rm c}_i(\x) \cap \overline{\mathcal{Z}}^{\rm a}_i(\x),
1652%\end{equation}
1653%where
1654%$$
1655%\overline{\mathcal{Z}}^{\rm c}_i(\x):= \left\{\bm{z} \in \mathbb{R}^H_+ ~\left|~ \sum_{j\in \mathbf{M}^{\rm c}_i}\sum_{t\in \mathbf{T}_{ij}} z_{ijt} x_{ijt}\le \mathcal{T}_R \right.\right\}
1656%$$
1657%and
1658%$$
1659%\overline{\mathcal{Z}}^{\rm a}_i(\x):=\bigcap\limits_{j\in \mathbf{M}^{\rm a}_i}\bigcap\limits_{t\in \mathbf{T}_{ij}} \Big\{\bm{z} \in \mathbb{R}^H_+ ~\left|~z_{ijt} x_{ijt}  \le \mathcal{T}_R \right.\Big\}.
1660%$$
1661%
1662%The following result establishes an equivalent formulation of regular robust optimization for the above worst-case probabilistic chance  function~\eqref{Prob-2}.
1663%
1664%\begin{lemma}\label{lem1-LT-2}
1665%Given system design $\x$, the worst-case probabilistic chance function (\ref{Prob-2}) is equivalent to the optimal value of the following  optimization problem:
1666%\begin{equation}\label{P2-ambiguity-sup-dual1-2}
1667%\begin{array}{rcll}
1668%&\!\!\!\!\!\!\!\!\!\!\!\! \max\limits_{\bm{\alpha}, \bm{\beta}, \bm{\lambda}, \bm{\tau}} &  1-\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}\sum\limits_{k\in[K]} \left(\alpha^{k}_{ijt}\underline{\mu}^{k}_{ij} + \beta^{k}_{ijt}\overline{\mu}^{k}_{ij}\right) - \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i}\sum\limits_{k\in[K]}\epsilon^{k}_{ij}\lambda^{k}_{ij} - \sum\limits_{k\in[K]}\tau_{k} \\[0.3 cm]
1669%&\!\!\!\!\!\!\!\!\!\!\!\!{\rm s.t.} & \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}} \left[z_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + \left|\frac{ z_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right|\lambda^{k}_{ij}\right] + \tau_k \ge{p_k}, & \!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\forall \bm{z} \in  \mathcal{Z}_k\cap \overline{\mathcal{Z}}_i(\x), i \in \mathbf{N}, k \in [K]\\
1670%&& \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}} \left[z_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + \left|\frac{ z_{ij{t}}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right|\lambda^{k}_{ij}\right] + \tau_k \ge 0, & \!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\!\forall \bm{z} \in  \mathcal{Z}_k, k\in [K]\\
1671%&& \halpha \le \mathbf{0}, \hbeta, \hlambda \ge \mathbf{0}, \bm{\tau} \in \mathbb{R}^K,
1672%\end{array}
1673%\end{equation}
1674%where
1675%\begin{equation}\label{equ:W-2}
1676%\mathcal{Z}_k = \big\{\bm{z} \in \mathcal{Z}\:\big|\: 2(\hmu_i - \hmu_k)^{\top}\bm{z} \leq \hmu_{i}^{\top}\hmu_{i} - \hmu_{k}^{\top}\hmu_{k}, \forall i \in [K] \big\}, \forall k \in [K].
1677%\end{equation}
1678%\end{lemma}
1679%\begin{proof}
1680%With the notation in (\ref{constraint-set-2}), the worst-case probabilistic chance function (\ref{Prob-2})  can be rewritten in terms of the probability of its complementary event:
1681%\begin{equation}\label{1minus-2}
1682%\inf\limits_{\P\in \mathbb{F}_{K}} \P\Big[\tilde{\bm{z} }\in \mathcal{Z}_i(\x),\forall i \in \mathbf{N} \Big]=1-\sup\limits_{\P \in \mathbb{F}_K } \P\Big[\cup_{i\in \mathbf{N} }\{\bm{\tilde{z}} \in \overline{\mathcal{Z}}_i(\x)\} \Big].
1683%\end{equation}
1684%
1685%Given the probability distribution of  $\tilde{s}$ as
1686%$$
1687%\P\Big[\tilde{k}=k\Big]=p_k, \forall k \in [K].
1688%$$
1689%We now define $\P_k$ as the conditional distribution of $\bm{\tilde{z}}$ given $\tilde{k}=k$ for $k \in [K]$, we then can decompose any distribution $\P \in \mathbb{F}_K$ using $\{\P_k, k\in [K]\}$ and rewrite the worst-case chance
1690%$$
1691%\sup\limits_{\P \in \mathbb{F}_K } \P\Big[\cup_{i\in \mathbf{N} }\{\bm{\tilde{z}} \in \overline{\mathcal{Z}}_i(\x)\} \Big]
1692%$$
1693%using total probability law as following formulation:
1694%\begin{eqnarray}\label{P2-ambiguity-sup-2}
1695%&&\sup\limits_{\P \in \mathbb{F}_K } \P\Big[\cup_{i\in \mathbf{N} }\{\bm{\tilde{z}} \in \overline{\mathcal{Z}}_i(\x)\} \Big]\\[0.35 cm]
1696%&=&\sup\limits_{\P_k, \forall k\in[K]}\sum\limits_{k\in[K]}p_k\P_k\Big[\cup_{i\in \mathbf{N} }\{\bm{\tilde{z}} \in \overline{\mathcal{Z}}_i(\x)\} \Big]\\[0.25 cm]
1697%&=&\sup\limits_{\P_k}\sum\limits_{k\in[K]}\displaystyle \int_{\cup_{i\in \mathbf{N} }\left\{\bm{\tilde{z}} \in \overline{\mathcal{Z}}_i(\x)\right\}}p_k {\rm d}\P_k(\bm{\tilde{z}}) \\[0.25 cm]
1698%&{\rm s.t.} &\displaystyle \int_{\bm{\tilde{z}} \in \mathcal{Z}_k} \tilde{z}_{ijt} {\rm d}\P_k(\bm{\tilde{z}}) \ge \underline{\mu}^{k}_{ij},    \forall i \in \mathbf{N}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij}, k\in [K]\\ [0.35 cm]
1699%&& \displaystyle \int_{\bm{\tilde{z}} \in \mathcal{Z}_k} \tilde{z}_{ijt} {\rm d}\P_k(\bm{\tilde{z}}) \le \overline{\mu}^{k}_{ij},    \forall i \in \mathbf{N}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij},k\in [K]\\ [0.35 cm]
1700%&&  \displaystyle \int_{\bm{\tilde{z}} \in \mathcal{Z}_k} \sum\limits_{t\in \mathbf{T}_{ij}}\left|\frac{\tilde{z}_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right| {\rm d}\P_k(\bm{\tilde{z}}) \le \epsilon^{k}_{ij},   \forall i \in \mathbf{N}, j\in \mathbf{M}_i,k\in [K]\\[0.35 cm]
1701%&& \displaystyle \int_{\bm{\tilde{z}} \in \mathcal{Z}_k} {\rm d}\P_k(\bm{\tilde{z}}) =1, \forall k\in[K],
1702%\end{eqnarray}
1703%where the support $\mathcal{Z}_k$ is given by (\ref{equ:W-2}). The Lagrange dual of above moment problem (\ref{P2-ambiguity-sup-2}) has the following formulation (Wiesemann~et al.~2014):
1704%\begin{equation}\label{P1-ambiguity-sup-dual0-2}
1705%\begin{array}{rcl}
1706%&\!\!\!\!\!\!\!\!\!\!\!\! \min\limits_{\bm{\alpha}, \bm{\beta}, \bm{\lambda}, \bm{\tau}} &  \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}\sum\limits_{k\in[K]} \left(\alpha^{k}_{ijt}\underline{\mu}^{k}_{ij}+ \beta^{k}_{ijt}\overline{\mu}^{k}_{ij}\right)+\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i}\sum\limits_{k\in[K]}\epsilon^{k}_{ij}\lambda^{k}_{ij} + \sum\limits_{k\in[K]}\tau_{k} \\[0.3 cm]
1707%&\!\!\!\!\!\!\!\!\!\!\!\!{\rm s.t.} & \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}  \left[z_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + \left|\frac{ z_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right|\lambda^{k}_{ij}\right] + \tau_k\\
1708%&& \ge p_k\mathbb{I}\Big({\cup_{i\in \mathbf{N} }\left\{ \bm{z} \in \overline{\mathcal{Z}}_i\right\}} \Big), \forall \bm{z} \in \mathcal{Z}_k, k \in [K]\\
1709%&& \halpha \le \mathbf{0}, \hbeta, \hlambda \ge \mathbf{0}, \bm{\tau} \in \mathbb{R}^K,
1710%\end{array}
1711%\end{equation}
1712%where $\mathbb{I}(\{\cdot\})$ is the indicator function with respect to set $\{\cdot\}$, and  $(\halpha, \hbeta, \hlambda, \htau)$ are the dual variables associated with the constraints of the primal problem (\ref{P2-ambiguity-sup-2}).
1713%
1714%
1715%
1716%Furthermore, we show the strong duality holds. Since ${\mu^{k}_{ij}}$ is the expectation of $\bm{\tilde{z}}_{ijt}$, we can always find a Dirac probability distribution $\P^{\dag}_{\bm{\mu}}$ with $\underline{\hmu}<\hmu<\overline{\hmu}$ which is relative interior point of the feasible set of problem (\ref{P2-ambiguity-sup-2}). Therefore, the Slater condition holds, and then the optimal value of (\ref{P1-ambiguity-sup-dual0-2}) is equivalent to that of problem (\ref{P2-ambiguity-sup-2}).
1717%
1718%
1719%
1720%
1721%
1722%Next, expanding the indication function $\mathbb{I}\left({\cup_{i\in \mathbf{N} }\left\{\bm{\tilde{z}} \in \overline{\mathcal{Z}}_i(\x)\right\}} \right)$ for different cases of $\bm{z}$, the above problem (\ref{P1-ambiguity-sup-dual0-2}) is also equivalent to the following formulation:
1723%\begin{equation}\label{P1-ambiguity-sup-dual00-2}
1724%\begin{array}{rcll}
1725%&\!\!\!\!\!\!\!\!\!\!\!\! \min\limits_{\bm{\alpha}, \bm{\beta}, \bm{\lambda}, \bm{\tau}} &  \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}\sum\limits_{k\in[K]} \left(\alpha^{k}_{ijt}\underline{\mu}^{k}_{ij}+ \beta^{k}_{ijt}\overline{\mu}^{k}_{ij}\right)+\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i}\sum\limits_{k\in[K]}\epsilon^{k}_{ij}\lambda^{k}_{ij} + \sum\limits_{k\in[K]}\tau_{k} \\[0.3 cm]
1726%&\!\!\!\!\!\!\!\!\!\!\!\!{\rm s.t.} & \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}  \left[z_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + \left|\frac{ z_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right|\lambda^{k}_{ij}\right] + \tau_k \ge p_k, \forall \bm{z} \in \mathcal{Z}_k\cap\overline{\mathcal{Z}}_i, i \in \mathbf{N}, k \in [K]\\
1727%&& \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}  \left[z_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + \left|\frac{ z_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right|\lambda^{k}_{ij}\right] + \tau_k \ge0, \forall \bm{z} \in \mathcal{Z}_k, k \in [K]\\\nonumber
1728%&& \halpha \le \mathbf{0}, \hbeta, \hlambda \ge \mathbf{0}, \bm{\tau} \in \mathbb{R}^K.
1729%\end{array}
1730%\end{equation}
1731%Finally, plugging this formulation into the equation (\ref{1minus-2}), we arrive at the the formulation of (\ref{P2-ambiguity-sup-dual1-2}) whose optimal objective value is exactly the worst-case value of probabilistic chance function (\ref{Prob-2}) for all the $\mathbb{P}_k \in \mathbb{F}_k$, given system design $\x$.  The proof is completed.
1732%\end{proof}
1733%
1734%It is noted that the derived optimization problem (\ref{P2-ambiguity-sup-dual1-2}) in current version still belongs to semi-infinitely dimensional optimization problems which are not directly computable.  In the following, we show that by duality argument the problem can be further transformed into a computationally tractable formulation of linear program.
1735%
1736%\begin{proposition}\label{P-proposition1b-2}
1737%Given a system design $\x$, the worst-case probabilistic chance function (\ref{Prob-2}) solves the following linear program (LP):
1738%\begin{eqnarray}
1739%&\!\!\!\!\!\! \max &  1-\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}\sum\limits_{k\in[K]} \left(\alpha^{k}_{ijt}\underline{\mu}^{k}_{ij} + \beta^{k}_{ijt}\overline{\mu}^{k}_{ij}\right) - \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i}\sum\limits_{k\in[K]}\epsilon^{k}_{ij}\lambda^{k}_{ij} - \sum\limits_{k\in[K]}\tau_{k} \label{HP1-ambiguity-LP-FL-2} \\
1740% &\!\!\!\!\!\!{\rm s.t.} & \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}} \Big[\phi^{lk}_{ijt}\underline{z}_{ij}+\varphi^{lk}_{ijt}\overline{z}_{ij}+{\nu^{k}_{ij}\left(\pi^{lk}_{ijt}-\varpi^{lk}_{ijt} \right)}  \nonumber\\
1741%  &&+  \sum\limits_{n \in [K]}\psi^{lk}_{n}({(\mu^{n}_{ijt})}^2 - {(\mu^{k}_{ijt})}^2)\Big] + \sum\limits_{j\in \mathbf{M}^{\rm a}_{l}}\sum\limits_{t\in \mathbf{T}_{lj}} q^{lk}_{jt}\mathcal{T}_R+ s^{lk}\mathcal{T}_R + \tau_k \geq p_k, \forall l \in \mathbf{N}, k \in [K]  \label{HP1-ambiguity-LP-FL1-3}\\
1742%  && \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}} \Big[\rho^{k}_{ijt}\underline{z}_{ij}+\varrho^{k}_{ijt}\overline{z}_{ij}+{\nu^{k}_{ij}\left(\gamma^{k}_{ijt}-\theta^{k}_{ijt} \right)} \nonumber\\ && + \sum\limits_{n \in [K]}\varsigma^{k}_{n}({(\mu^{n}_{ijt})}^2 - {(\mu^{k}_{ijt})}^2)\Big] + \tau_k \geq0, \forall k \in [K]\label{HP1-ambiguity-LP-FL1-2-2}\\
1743% &&  q^{lk}_{jt}x_{l jt}  +\sum\limits_{n \in [K]}2(\mu^{n}_{ljt} - \mu^{k}_{ljt})\psi^{lk}_{n}+ \phi^{lk}_{ljt}+\varphi^{lk}_{ljt}+{ \pi^{lk}_{ljt}-\varpi^{lk}_{ljt} } = \alpha^{k}_{ljt}+\beta^{k}_{ljt},\nonumber\\ &&  \forall l \in \mathbf{N}, j\in \mathbf{M}^{\rm a}_l, t \in \mathbf{T}_{l j}, k \in [K] \\
1744%&&  s^{lk}x_{l jt}  +\sum\limits_{n \in [K]}2(\mu^{n}_{ljt} - \mu^{k}_{ljt})\psi^{lk}_{n}+ \phi^{lk}_{ljt}+\varphi^{lk}_{ljt}+\pi^{lk}_{ljt}-\varpi^{lk}_{ljt}  = \alpha^{k}_{ljt}+\beta^{k}_{ljt},\nonumber\\ &&  \forall l \in \mathbf{N}, j\in \mathbf{M}^{\rm c}_l, t \in \mathbf{T}_{l j}, k \in [K] \\
1745%&&   \sum\limits_{n \in [K]}2(\mu^{n}_{ijt} - \mu^{k}_{ijt})\psi^{lk}_{n}+  \phi^{lk}_{ijt}+\varphi^{lk}_{ijt}+\pi^{lk}_{ijt}-\varpi^{lk}_{ijt} = \alpha^{k}_{ijt}+\beta^{k}_{ijt},\nonumber\\ && ~ \forall l \in \mathbf{N}, i \in \mathbf{N}\setminus\{l \}, j \in \mathbf{M}_i, t\in \mathbf{T}_{ij}, k \in [K], \label{HP2-ambiguity-LP-FL2-2} \\
1746%&&{|\mathbf{T}_{ij} |\sigma^{k}_{ij}}(\pi^{lk}_{ijt}+\varpi^{lk}_{ijt}) =\lambda^{k}_{ij},  ~ \forall l  \in \mathbf{N}, i \in \mathbf{N}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij}, k \in [K] \\
1747%&& \sum\limits_{n \in [K]}2(\mu^{n}_{ijt} - \mu^{k}_{ijt})\varsigma^{k}_{n}+ \rho^{k}_{ijt}+\varrho^{k}_{ijt}+\gamma^{k}_{ijt}-\theta^{k}_{ijt} = \alpha^{k}_{ijt}+\beta^{k}_{ijt},\nonumber\\ && ~ \forall i \in \mathbf{N}, j \in \mathbf{M}_i, t\in \mathbf{T}_{ij}, k \in [K]  \\
1748%&&  {|\mathbf{T}_{ij} |\sigma^{k}_{ij}}(\gamma^{k}_{ijt}+\theta^{k}_{ijt}) =\lambda^{k}_{ij},  ~ \forall i \in \mathbf{N}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij}, k \in [K] \\
1749%%&&  q_{l jk}\le y_{{l jk}}, ~\forall l  \in \mathbf{N}, j \in \mathbf{M}_{l}, k\in \mathcal{N}(l,j) \\[0.3 cm]
1750%%&& y_{{l jk}} \ge M x_{l jk}, ~\forall l  \in \mathbf{N}, j \in \mathbf{M}_{l}, k\in \mathcal{N}(l,j) \\[0.3 cm]
1751%%&& y_{{l jk}} \le  q_{l jk }+(x_{l jk}-1)M, ~\forall l  \in \mathbf{N}, j \in \mathcal{J}({l}), k \in \mathcal{N}(l,j)\\[0.3 cm]
1752%%&&  \sum\limits_{k\in \mathbf{K}_{ij}} x^{k}_{ij}\ge  L_{ij}, ~  \forall  i \in \mathbf{N}, j\in \mathbf{M}_i \\
1753%&& \halpha,\q, \s, \hpsi, \hvarphi, \hvarsigma, \hvarrho \le \mathbf{0}, \htau \in \mathbb{R}^K,  \\
1754%&& \hbeta, \hlambda, \hphi, \hrho, \hpi,\hvarpi, \hgamma, \htheta \ge \mathbf{0}, \label{HP2-ambiguity-LP-FL-2}
1755%\end{eqnarray}
1756%where $\halpha, \hbeta, \hlambda, \htau, \q, \s, \hpsi, \hphi, \hvarphi, \hpi, \hvarpi, \hrho, \hvarrho, \hvarsigma, \hgamma, \htheta$ are auxiliary variables.
1757%\end{proposition}
1758%
1759%{\bf Proof.}
1760%First of all, for a given $l  \in \mathbf{N}$ we deal with the infinitely dimensional constraints
1761%$$
1762%\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}  \left[z_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + \left|\frac{ z_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right|\lambda^{k}_{ij}\right] + \tau_k \ge p_k, \forall \bm{z} \in \mathcal{Z}_k\cap\overline{\mathcal{Z}}_l, k \in [K]\\
1763%$$
1764%Recall that
1765%$$
1766%\overline{\mathcal{Z}}_l(\x)=\overline{\mathcal{Z}}^{\rm c}_l(\x) \cap \overline{\mathcal{Z}}^{\rm a}_l(\x)= \left\{\bm{z} \in \mathbb{R}^{H}_+ ~\left|~\begin{array}{l}
1767%\displaystyle \sum_{j\in \mathbf{M}^{\rm c}_{l}}\sum_{t\in \mathbf{T}_{lj}} z_{ljt} x_{ljt}\le \mathcal{T}_R \\
1768% z_{ljt} x_{ljt}  \le \mathcal{T}_R,  j\in \mathbf{M}^a_{l}, t \in \mathbf{T}_{lj}
1769%                                                                                                                                             \end{array}
1770%\right.\right\},
1771%$$
1772%and
1773%$$
1774%\mathcal{Z}_k = \big\{\bm{z} \in \mathcal{Z}\:\big|\: 2(\hmu_i - \hmu_k)^{\top}\bm{z} \leq \hmu_{i}^{\top}\hmu_{i} - \hmu_{k}^{\top}\hmu_{k}, \forall i \in [K] \big\}, \forall k \in [K],
1775%$$
1776%where
1777%$$
1778%\mathcal{Z} := \big\{\bm{z} \in \mathbb{R}^{H}| z_{ijt} \in [\underline{z}_{ij},\overline{z}_{ij}], \forall i \in \mathbf{N}, j \in \mathbf{M}_i, t\in \mathbf{T}_{ij} \big\}.
1779%$$
1780%
1781%First of all, we claim that for any $k \in [K]$
1782%\begin{equation}\label{Lifting-3}
1783%\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}  \left[z_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + \left|\frac{ z_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right|\lambda^{k}_{ij}\right] + \tau_k \ge p_k, \forall \bm{z} \in \mathcal{Z}_k\cap\overline{\mathcal{Z}}_l
1784%\end{equation}
1785%is equivalent to
1786%\begin{equation}\label{Lifting-4}
1787%\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}  \left[z_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + u^{k}_{ijt}\lambda^{k}_{ij}\right] + \tau_k \ge p_k, \forall (\bm{z},\u) \in \mathcal{W}_k,
1788%\end{equation}
1789%where
1790%$$
1791%\mathcal{W}_k := \left\{(\bm{z}, \u) \middle| \: \bm{z} \in \mathcal{Z}_k\cap\overline{\mathcal{Z}}_l, \left|\frac{ z_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right| \leq u^{k}_{ijt}, \forall i \in \mathbf{N}, j \in \mathbf{M}_i, t \in \mathbf{T}_{ij} \right\}, \forall k \in [K].
1792%$$
1793%
1794%Assume \eqref{Lifting-3} holds, since $\hlambda \geq \mathbf{0}$,
1795%$$
1796%u^{k}_{ijt}\lambda^{k}_{ij} \geq \left|\frac{ z_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right|\lambda^{k}_{ij}, \forall i \in \mathbf{N}, j \in \mathbf{M}_i, t \in \mathbf{T}_{ij}, (\bm{z}, \u) \in \mathcal{W}_k.
1797%$$
1798%Therefore \eqref{Lifting-4} holds.
1799%
1800%Assume \eqref{Lifting-4} holds, then for any $\hat{\bm{z}} \in \mathcal{Z}_k\cap\overline{\mathcal{Z}}_l$,
1801%$$
1802%\min\limits_{(\hat{\bm{z}}, \u) \in \mathcal{W}_k}u^{k}_{ijt} = \left|\frac{ \hat{z}_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right|, \forall i \in \mathbf{N}, j \in \mathbf{M}_i, t \in \mathbf{T}_{ij}.
1803%$$
1804%Since $\hlambda \geq \mathbf{0}$,
1805%$$
1806%\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}  \left[\hat{z}_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + \left|\frac{ \hat{z}_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right|\lambda^{k}_{ij}\right] + \tau_k
1807%$$
1808%$$
1809%=\min\limits_{(\hat{\bm{z}}, \u) \in \mathcal{W}_k}\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}  \left[\hat{z}_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + u^{k}_{ijt}\lambda^{k}_{ij}\right] + \tau_k \geq p_k.
1810%$$
1811%Therefore \eqref{Lifting-3} holds.
1812%
1813%Therefore \eqref{Lifting-3} and \eqref{Lifting-4} are equivalent.
1814%
1815%Then, by introducing auxiliary variables $u^{k}_{ijt}, \forall i \in \mathbf{N}, j \in \mathbf{M}_i, t\in \mathbf{T}_{ij}$, we can equivalently lift the above constraints into the following optimization-based formulation:
1816%\begin{equation}\label{H-system2}
1817%\left.\begin{array}{rcll}
1818%  p_k-\tau_k\le & \min\limits_{\bm{z}, \u} & \displaystyle \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}} \left[z_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + u^{k}_{ijt} \lambda^{k}_{ij}\right]  \\[0.3 cm]
1819%&{\rm s.t.} &    z_{l jt}x_{l jt}\le \mathcal{T}_R, &\forall j\in \mathbf{M}^{\rm a}_{l}, t\in \mathbf{T}_{l j}  \\[0.3 cm]
1820%&& \displaystyle \sum_{j\in \mathbf{M}^{\rm c}_{l}}\sum_{t\in \mathbf{T}_{lj}} z_{ljt} x_{ljt}\le \mathcal{T}_R, &   \\[0.3 cm]
1821%&&  {|\mathbf{T}_{ij} |\sigma^{k}_{ij}}u^{k}_{ijt}- { z_{ijt}} \ge { -\nu^{k}_{ij}}, & \forall i \in \mathbf{N}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij} \\[0.3 cm]
1822%&&  {|\mathbf{T}_{ij} |\sigma^{k}_{ij}}u^{k}_{ijt}+  { z_{ijt}} \ge  {\nu^{k}_{ij} }, & \forall i \in \mathbf{N}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij} \\[0.3 cm]
1823%&&  \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}} 2(\mu^{n}_{ijt} - \mu^{k}_{ijt})z_{ijt}\\[0.3 cm]
1824%&& \leq \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}} {(\mu^{n}_{ijt})}^2 - {(\mu^{k}_{ijt})}^2, & \forall n \in [K]\\[0.3 cm]
1825%&& \underline{z}_{ij} \leq z_{ijt} \leq \overline{z}_{ij} & \forall i \in \mathbf{N}, j \in \mathbf{M}_i, t\in \mathbf{T}_{ij}\\[0.3cm]
1826%&& u^{k}_{ijt} \in \mathbb{R}, & \forall i \in \mathbf{N}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij}.
1827%\end{array}\right\}, \forall k \in [K]
1828%\end{equation}
1829%
1830%By the strong duality of linear programming, the above constraint is also equivalent to the following system: for all $k$ in $[K]$,
1831%\begin{equation*}
1832%\left\{  \begin{array}{rl}
1833%& p_k-\tau_k\le \sum\limits_{j\in \mathbf{M}^{\rm a}_{l}}\sum\limits_{t\in \mathbf{T}_{lj}} q^{lk}_{jt}\mathcal{T}_R+ s^{lk}\mathcal{T}_R\\[0.3 cm]
1834%& +\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}} \Big[\phi^{lk}_{ijt}\underline{z}_{ij}+\varphi^{lk}_{ijt}\overline{z}_{ij}+{\nu^{k}_{ij}\left(\pi^{lk}_{ijt}-\varpi^{lk}_{ijt} \right)}  + \sum\limits_{n \in [K]}\psi^{lk}_{n}({(\mu^{n}_{ijt})}^2 - {(\mu^{k}_{ijt})}^2)\Big]  \\[0.3 cm]
1835%&  q^{lk}_{jt}x_{l jt}  +\sum\limits_{n \in [K]}2(\mu^{n}_{ljt} - \mu^{k}_{ljt})\psi^{lk}_{n}+ \phi^{lk}_{ljt}+\varphi^{lk}_{ljt}+{ \pi^{lk}_{ljt}-\varpi^{lk}_{ljt} } = \alpha^{k}_{ljt}+\beta^{k}_{ljt}, \forall j\in \mathbf{M}^{\rm a}_l, t \in \mathbf{T}_{l j} \\[0.3 cm]
1836%&  s^{lk}x_{l jt}  +\sum\limits_{n \in [K]}2(\mu^{n}_{ljt} - \mu^{k}_{ljt})\psi^{lk}_{n}+ \phi^{lk}_{ljt}+\varphi^{lk}_{ljt}+\pi^{lk}_{ljt}-\varpi^{lk}_{ljt}  = \alpha^{k}_{ljt}+\beta^{k}_{ljt}, \forall j\in \mathbf{M}^{\rm c}_l, t \in \mathbf{T}_{l j} \\[0.3 cm]
1837%&   \sum\limits_{n \in [K]}2(\mu^{n}_{ijt} - \mu^{k}_{ijt})\psi^{lk}_{n}+  \phi^{lk}_{ijt}+\varphi^{lk}_{ijt}+\pi^{lk}_{ijt}-\varpi^{lk}_{ijt} = \alpha^{k}_{ijt}+\beta^{k}_{ijt}, ~ \forall i \in \mathbf{N}\setminus\{l \}, j \in \mathbf{M}_i, t\in \mathbf{T}_{ij}  \\[0.3 cm]
1838%&{|\mathbf{T}_{ij} |\sigma^{k}_{ij}}(\pi^{lk}_{ijt}+\varpi^{lk}_{ijt}) =\lambda^{k}_{ij},  ~ \forall i \in \mathbf{N}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij} \\[0.3 cm]
1839%&   q^{lk}_{jt}\le 0, s^{lk}\le 0, \psi^{lk}_{n} \le 0, \phi^{lk}_{ijt} \geq 0, \varphi^{lk}_{ijt} \leq 0, \pi^{lk}_{ijt}\ge 0,\varpi^{lk}_{ijt}\ge 0, ~\forall n \in [K], i \in \mathbf{N}, j\in \mathbf{M}_i, t\in \mathbf{T}_{ij}.
1840%\end{array}
1841%\right\}
1842%\end{equation*}
1843%
1844%
1845%
1846%
1847%Likewise, the constraints
1848%$$
1849%\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}  \left[z_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + \left|\frac{ z_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right|\lambda^{k}_{ij}\right] + \tau_k \ge0, \forall \bm{z} \in \mathcal{Z}_k, k \in [K]\\
1850%$$
1851%can also be dualized similarly.  Leveraging on the derived formulation (\ref{P2-ambiguity-sup-dual1-2}) in Lemma~\ref{lem1-LT-2}, we can arrive at the formulation of the linear program (\ref{HP1-ambiguity-LP-FL-2})--(\ref{HP2-ambiguity-LP-FL-2}). We are done.
1852%\blot
1853%
1854%
1855%Although the derived formulation (\ref{HP1-ambiguity-LP-FL-2})-(\ref{HP2-ambiguity-LP-FL-2}) can be considered as a linear program given a system design $\mathbf{x}$, in the original problem $\mathbf{x}$ is the decision variable, so there are bilinear terms $q^{lk}_{jt}x_{l jt}$ and $s^{lk}x_{l jt}$ in the overall formulation after the derived formulation is plugged back into the original problem. We can linearize these terms.
1856%
1857%
1858%\begin{proposition}\label{proposition1b-2-2}
1859%The overall problem  is equivalent to the following mixed integer linear program:
1860%\begin{eqnarray}
1861% & \min\limits_{\x} &  \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \left[\sum\limits_{t\in \mathbf{T}_{ij}} x_{ijt}\right]c_{ij} \label{HP1-ambiguity-MILP-FL1-4-2}\\
1862% &{\rm s.t.} & 1-\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}}\sum\limits_{k\in[K]} \left(\alpha^{k}_{ijt}\underline{\mu}^{k}_{ij} + \beta^{k}_{ijt}\overline{\mu}^{k}_{ij}\right)\nonumber\\
1863% && - \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i}\sum\limits_{k\in[K]}\epsilon^{k}_{ij}\lambda^{k}_{ij} - \sum\limits_{k\in[K]}\tau_{k}\ge R_{0}   \\
1864% &&  L_{i} \le \sum\limits_{j\in \mathbf{M}_i}\sum\limits_{t\in \mathbf{T}_{ij}} x_{ijt}\le  U_{i}, ~  \forall  i \in \mathbf{N} \\
1865%%&& \sum\limits_{j\in \mathbf{M}_{l}} \sum\limits_{k\in \mathbf{K}_{l j}}q_{l jk}\mathcal{T}_R\nonumber\\
1866%% &&+\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{k\in \mathbf{K}_{ij}} \left[ \phi^{l }^{k}_{ij}\underline{z}_{ij}+ \varphi^{\varsigma}^{k}_{ij}\overline{z}_{ij}  + {\nu_{ij}\left(\pi^{l }^{k}_{ij}-\varpi^{l }^{k}_{ij} \right)} \right]+\tau \ge 1,~\forall {l  \in \mathbf{N}}  \\
1867%&& y^{\rm a}_{l jk}  +\sum\limits_{n \in [K]}2(\mu^{n}_{ljt} - \mu^{k}_{ljt})\psi^{lk}_{n}+ \phi^{lk}_{ljt}+\varphi^{lk}_{ljt}+{ \pi^{lk}_{ljt}-\varpi^{lk}_{ljt} } = \alpha^{k}_{ljt}+\beta^{k}_{ljt},\nonumber\\ &&  \forall l \in \mathbf{N}, j\in \mathbf{M}^{\rm a}_l, t \in \mathbf{T}_{l j}, k \in [K]  \label{con:32}  \\
1868%&& y^{\rm c}_{l jk}  +\sum\limits_{n \in [K]}2(\mu^{n}_{ljt} - \mu^{k}_{ljt})\psi^{lk}_{n}+ \phi^{lk}_{ljt}+\varphi^{lk}_{ljt}+\pi^{lk}_{ljt}-\varpi^{lk}_{ljt}  = \alpha^{k}_{ljt}+\beta^{k}_{ljt},\nonumber\\ &&  \forall l \in \mathbf{N}, j\in \mathbf{M}^{\rm c}_l, t \in \mathbf{T}_{l j}, k \in [K]    \\
1869% && (\ref{HP1-ambiguity-LP-FL1-3}-\ref{HP1-ambiguity-LP-FL1-2-2}); (\ref{HP2-ambiguity-LP-FL2-2})-(\ref{HP2-ambiguity-LP-FL-2})\\
1870%%&&\phi^{l }^{k}_{ij}+\varphi^{l }^{k}_{ij}+ { \pi^{l }^{k}_{ij}-\varpi^{l }^{k}_{ij} }= \alpha^{k}_{ij}+\beta^{k}_{ij}, ~ \forall {l  \in \mathbf{N}}, i \in \mathbf{N}\setminus\{l \}, j \in \mathbf{M}_i, k\in \mathbf{K}_{ij}  \\
1871%%&&{|\mathbf{K}_{ij} |\sigma_{ij}} (\pi^{l }^{k}_{ij}+\varpi^{l }^{k}_{ij}) =\lambda_{ij},  ~ \forall {l  \in \mathbf{N}}, i \in \mathbf{N}, j\in \mathbf{M}_i, k\in \mathbf{K}_{ij} \\
1872%%&& \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{k\in \mathbf{K}_{ij}} \left[\left(\varsigma^{k}_{ij}\underline{z}_{ij}+ \vartheta^{k}_{ij}\overline{z}_{ij} \right) +  {\nu_{ij}\left(\gamma^{k}_{ij}-\theta^{k}_{ij} \right)}\right]+\tau \ge 0\\
1873%%&& \varsigma^{k}_{ij}+ \vartheta^{k}_{ij} + { \gamma^{k}_{ij}-\theta^{k}_{ij} }= \alpha^{k}_{ij}+\beta^{k}_{ij}, ~ \forall  i \in \mathbf{N}, j\in \mathbf{M}_i, k\in \mathbf{K}_{ij} \\
1874%%&&  {|\mathbf{K}_{ij} |\sigma_{ij}}(\gamma^{k}_{ij}+\theta^{k}_{ij}) = \lambda_{{ij}}, ~  \forall  i \in \mathbf{N}, j\in \mathbf{M}_i, k\in \mathbf{K}_{ij} \\
1875%&&  q^{lk}_{jt}\le y^{{\rm a}lk}_{jt}, ~\forall l  \in \mathbf{N}, j \in \mathbf{M}^{\rm a}_{l}, t\in \mathbf{T}_{l j}, k \in [K] \\
1876%&& y^{{\rm a}lk}_{jt} \ge M x_{l jt}, ~\forall l  \in \mathbf{N}, j \in \mathbf{M}^{\rm a}_{l}, t\in \mathbf{T}_{l j}, k \in [K] \\
1877%&& y^{{\rm a}lk}_{jt} \le  q^{lk}_{jt}+(x_{l jt}-1)M, ~\forall l  \in \mathbf{N}, j \in \mathbf{M}^{\rm a}_{l}, t \in \mathbf{T}_{l j}, k \in [K]\\
1878%&&  s^{lk}\le y^{{\rm c}lk}_{jt}, ~\forall l  \in \mathbf{N}, j \in \mathbf{M}^{\rm c}_{l}, t\in \mathbf{T}_{l j}, k \in [K] \\
1879%&& y^{{\rm c}lk}_{jt} \ge M x_{l jt}, ~\forall l  \in \mathbf{N}, j \in \mathbf{M}^{\rm c}_{l}, t\in \mathbf{T}_{l j}, k \in [K] \\
1880%&& y^{{\rm c}lk}_{jt} \le  s^{lk}+(x_{l jt}-1)M, ~\forall l  \in \mathbf{N}, j \in \mathbf{M}^{\rm c}_{l}, t \in \mathbf{T}_{l j}, k \in [K]\\
1881%%&& \alpha^{k}_{ij}\le 0, \beta^{k}_{ij}\ge 0, \lambda_{{ij}} \ge 0, \tau \in \Re, ~\forall i \in \mathbf{N}, j\in \mathbf{M}_i, k\in \mathbf{K}_{ij}\\
1882%%&&q_{l jk}\le 0, y_{l jk} \le 0, ~\forall {l  \in \mathbf{N}}, j\in \mathbf{M}_{l}, k\in \mathbf{K}_{l j} \\
1883%%&& \phi^{l }^{k}_{ij} \ge 0,  \varphi^{l }^{k}_{ij} \le 0,  \pi^{l }^{k}_{ij}\ge 0,\varpi^{l }^{k}_{ij}\ge 0, ~\forall {l  \in \mathbf{N}},  i \in \mathbf{N}, j\in \mathbf{M}_i, k\in \mathbf{K}_{ij}\\
1884%%&& \theta^{k}_{ij}\ge 0, \gamma^{k}_{ij}\ge 0, \varsigma^{k}_{ij}\ge 0, \vartheta^{k}_{ij} \le 0, ~\forall  i \in \mathbf{N}, j\in \mathbf{M}_i, k\in \mathbf{K}_{ij}\\
1885%&& \y^{\rm a}, \y^{\rm c} \le \mathbf{0}, \x\in \{0,1\}^{H},  \label{HP1-ambiguity-MILP-FL2-4-2}
1886%\end{eqnarray}
1887%where $\halpha, \hbeta, \hlambda, \htau, \q, \s, \hpsi, \hphi, \hvarphi, \hpi, \hvarpi, \hrho, \hvarrho, \hvarsigma, \hgamma, \htheta, \y^{\rm a}$ and $\y^{\rm c}$ are auxiliary variables and $M$ is a sufficiently small negative number.
1888%\end{proposition}
1889%{\bf Proof. }
1890%In the proof of Proposition~\ref{P-proposition1b-2}, the feasible set $\mathcal{Z}_k\cap \overline{\mathcal{Z}}_{l }(\x)$ of the minimization problem
1891%$$
1892%\min\limits_{\bm{z}} \displaystyle \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{t\in \mathbf{T}_{ij}} \left[z_{ijt}\left(\alpha^{k}_{ijt}+ \beta^{k}_{ijt} \right) + \left|\frac{ z_{ijt}-\nu^{k}_{ij}}{|\mathbf{T}_{ij} |\sigma^{k}_{ij}} \right| \lambda^{k}_{ij}\right]
1893%$$
1894%is bounded. Assuming that it is nonempty, then its lifted equivalent form of the inner minimization problem in (\ref{H-system2}) is also bounded and nonempty. Therefore, the dual variables $q^{lk}_{jt}$ and $s^{lk}$ are also bounded. Therefore, we can linearize the bilinear terms $q^{lk}_{jt}x_{l jt}$ and $s^{lk}x_{l jt}$ by introducing new variables $y^{{\rm a}lk}_{jt}$ and $y^{{\rm c}lk}_{jt}$, such that
1895%\begin{equation}\label{equ:linear1}
1896%q^{lk}_{jt}\le y^{{\rm a}lk}_{jt}\le 0,~y^{{\rm a}lk}_{jt} \ge M x_{l jt},~y^{{\rm a}lk}_{jt} \le  q^{lk}_{jt}+(x_{l jt}-1)M,
1897%\end{equation}
1898%and
1899%\begin{equation}\label{equ:linear2}
1900%s^{lk}\le y^{{\rm c}lk}_{jt}\le 0,~y^{{\rm c}lk}_{jt} \ge M x_{l jt},~y^{{\rm c}lk}_{jt} \le  s^{lk}+(x_{l jt}-1)M,
1901%\end{equation}
1902%respectively, where $M$ is a sufficiently small negative number (in numerical computation, M can be set to a negative number with very large absolute value).  Using this linearization technique, we can arrive at the following formulation of mixed integer linear program (MILP) for the distributionally robust redundancy allocation problem. \blot\\
1903% Note that in the linearized MIP formulation~\eqref{HP1-ambiguity-MILP-FL1-4-2}-\eqref{HP1-ambiguity-MILP-FL2-4-2}, the integer variables are still $x_{ijt}$, the original redundancy allocation decision variables, therefore the problem remains tractable.
1904%
1905%\section{A baseline probabilistic reliability model}\label{EC4}
1906%For comparison, we consider the following probabilistic reliability model, which is an MIP with binaries (Feizollahi and Modarres~2012, Wang et.al. 2019):
1907%\begin{eqnarray}
1908%\begin{array}{rcll}
1909%& \min\limits_{\x} & \displaystyle \sum\limits_{i\in \mathbf{N}} \left[L_{i}+\sum_{k=0}^{U_{i}-L_{i}}kx_{ik}\right]c_{i} \\[0.3 cm]
1910%& {\rm s.t.} &  \displaystyle \sum_{i \in \mathbf{N}} \sum_{k=0}^{U_{i}-L_{i}}x_{ik}\ln\left[1-r_{i}^{L_{i}+k} \right]\ge \ln R_0  & \\[0.3 cm]
1911%&& \displaystyle \sum_{k=0}^{U_{i}-L_{i}}x_{ik}=1, & \forall i \in \mathbf{N}\\
1912%&& x_{ik} \in \{0,1\},  & \forall i \in \mathbf{N}, k \in [0; U_{i}-L_{i}],
1913%\end{array}
1914%\end{eqnarray}
1915%which can be solved by off-the-shelf MIP solvers. Note that the above linear MIP transformation holds only for the regular series-parallel redundant systems with a single type of component; as for the situation that involves multiple types ({\it i.e.,} $|\mathbf{M}_i|>1$ for some $i \in \mathbf{N}$) or the cold-standby subsystems are considered, the probabilistic model, in general, becomes intractable.
1916%
1917%\iffalse
1918%
1919%Furthermore, to enhance the scalability, we can harness both the structures of formulation (\ref{HP1-ambiguity-LP-FL})-(\ref{HP2-ambiguity-LP-FL}) and linearization \eqref{equ:linear1}-\eqref{equ:linear2} and design a Benders decomposition styled algorithm to solve the overall problem \eqref{HP1-ambiguity-X} iteratively. We briefly describe the procedures as follows.
1920%
1921%
1922%To ease the exposition, we denote by
1923%$$
1924%c(\x):=\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \left[\sum\limits_{k\in \mathbf{K}_{ij}} x^{k}_{ij}\right]c_{ij},
1925%$$
1926%and
1927%$$
1928%R(\x):=\inf\limits_{\P \in \mathbb{F}}\P\left[\min\limits_{i \in \mathbf{N}}\left(\sum_{j \in \mathbf{M}^{\rm c}_i}\sum_{k\in \mathbf{K}_{ij}} \tilde{z}^{k}_{ij}x^{k}_{ij} \bigvee \max_{j \in \mathbf{M}^{\rm a}_i}  \max_{k \in \mathbf{K}_{ij}}\tilde{z}^{k}_{ij}x^{k}_{ij}\right)> \mathcal{T}_R \right].
1929%$$
1930%If we applying the linearizations \eqref{equ:linear1}-\eqref{equ:linear2} to replace the bilinear terms $x_{l jk}q_{l jk}$ and $x_{l jk}p_{l}$ in the formulation (\ref{HP1-ambiguity-LP-FL})-(\ref{HP2-ambiguity-LP-FL})  of the worst-case probabilistic chance function (\ref{Prob-1}), we can re-express the chance function (\ref{Prob-1}) given $\x$ as the following compact form:
1931%$$
1932%R(\x)=\max_{\mathbf{A}\mathbf{d}=\mathbf{B}\mathbf{x}+\mathbf{r},\mathbf{d}\ge \mathbf{0}}\mathbf{d}'\mathbf{b},
1933%$$
1934%where $\mathbf{A}\mathbf{d}=\mathbf{B}\mathbf{x}+\mathbf{r},\mathbf{d}\ge \mathbf{0}$ represents the constraints in the problem (\ref{HP1-ambiguity-LP-FL})-(\ref{HP2-ambiguity-LP-FL}) after the linearization in a standardized form of linear program with $\mathbf{d}$ being the standardized decision variables, and $\mathbf{A},\mathbf{B}$ and $\b,\mathbf{r}$ being the matrix and vector coefficients or inputs with comfortable dimensions. Then by the duality argument, we can rewrite $R(\x)$ as
1935%\begin{equation}\label{algo-1}
1936%R(\x)=\min_{\mathbf{A}'\mathbf{g}\ge \mathbf{b}}\x'(\mathbf{B}'\mathbf{g})+ \mathbf{r}'\mathbf{g},
1937%\end{equation}
1938%where $\mathbf{g}$ are the dual variables. Hence the {\em relaxed master problem} of overall problem \eqref{HP1-ambiguity-X} can be expressed as
1939%\begin{equation}\label{algo-master}
1940%\begin{array}{rcll}
1941%& \min\limits_{\x} &  c(\x) \\[0.3 cm]
1942%& {\rm s.t.} & \x'(\mathbf{B}'\mathbf{g})+ \mathbf{r}'\mathbf{g} \ge R_0,& \forall \mathbf{g} \in \mathcal{G}\\
1943%&& L_{i} \le \sum\limits_{j\in \mathbf{M}_i}\sum\limits_{k\in \mathbf{K}_{ij}} x^{k}_{ij}\le  U_{i}, &\forall  i \in \mathbf{N} \\
1944%&& \x\in \{0,1\}^K,
1945%\end{array}
1946%\end{equation}
1947%where $\mathcal{G}\subset \mathcal{V}$ with $\mathcal{V}$ being the set of vertices of polyhedron $\{\g: \mathbf{A}'\mathbf{g}\ge \mathbf{b}\}$.  In particular, we have the following optimality condition:
1948%\begin{proposition}\label{optimality}
1949%Let $\x$ is a solution of the relaxed master problem \eqref{algo-master}, if $R(\x) \ge R_0$ then it is the optimal solution of \eqref{HP1-ambiguity-X}.
1950%\end{proposition}
1951%\begin{proof}
1952%Let $c^*$ be the optimal cost of overall problem \eqref{HP1-ambiguity-X}. Since $\x$ is a solution of relaxed master problem \eqref{algo-master}, we have $c(\x) \le c^*$. Now if $R(\x) \ge R_0$, $\x$ is also a feasible solution of \eqref{HP1-ambiguity-X}, it is then optimal.
1953%\end{proof}
1954%
1955%In our algorithm, for a given design $\x$, the linear program \eqref{algo-1} is used to establish the separation problem and optimality condition, where its solution $\mathbf{g}^*$ is used to generate cuts. Since $|\mathcal{V}|$ is finite, the algorithm can achieve the optimality in finite steps. The procedure of the algorithm is summarized as follows.
1956%
1957%\noindent\rule{\textwidth}{0.1em}\vspace{-5pt}\\
1958%\noindent {\bf A Benders Decomposition Algorithm for \eqref{HP1-ambiguity-X}.} \vspace{-10pt}\\
1959%\noindent\rule{\textwidth}{0.05em} \\
1960%{\bf Initialization:} $\mathcal{G}=\emptyset$.
1961%
1962%\begin{enumerate}
1963%\item  Solve the relaxed master problem \eqref{algo-master}, and obtain solution $\x$.
1964%\item  Compute $R(\x)$ with linear program \eqref{algo-1}:
1965%\begin{enumerate}
1966%  \item If $R(\x) \ge R_0$, then STOP and let $\x^*=\x$.
1967%  \item Otherwise, obtain solution $\mathbf{g}^*$ and update $\mathcal{G}=\mathcal{G} \cup \{\mathbf{g}^*\}$. Go to STEP 1.
1968%\end{enumerate}
1969%\end{enumerate}
1970%\noindent{\bf Output:} The optimal redundancy design $\x^*$.	
1971%
1972%\vspace{-5pt}
1973%\noindent\rule{\textwidth}{0.1em}\vspace{-17pt}\\
1974%%decision vector $\mathbf{d}$ represents the variables $\halpha, \hbeta, \hlambda, \tau, \q, \s, \y^{\rm a}, \y^{\rm c}, \hphi, \hvarphi, \hpi, \hvarpi, \htheta, \hvartheta, \hvarsigma$ and $\hvartheta$ of the,  $\r$ is the input vector with entries of $1, 0$ and $M$.
1975%
1976%
1977%%\begin{remark}\color{blue}
1978%%If we impose the independence of the lifetimes, then the problem becomes
1979%%\begin{eqnarray*}
1980%%&&\max_{\x}\P\left[\min\limits_{i \in \mathbf{N}} \left(\max\limits_{j \in \mathbf{M}_i}  \max\limits_{k \in \mathbf{K}_{ij}}   \tilde{z}^{k}_{ij} x^{k}_{ij}\right)> \mathcal{T}_R \right]\\
1981%%&=&\max_{\x}\prod_{i \in \mathbf{N}}\left[1-\prod_{j \in \mathbf{M}_i}\prod_{k \in \mathbf{K}_{ij}} \P\Big[\tilde{z}^{k}_{ij} x^{k}_{ij}\le \mathcal{T}_R \Big]\right]
1982%%\end{eqnarray*}
1983%%which is non-convex with respect to $\P\Big[\tilde{z}^{k}_{ij} x^{k}_{ij}\le \mathcal{T}_R \Big]$.
1984%%\end{remark}
1985%
1986%
1987%
1988%
1989%
1990%
1991%
1992%
1993%
1994%
1995%
1996%
1997%
1998%\section{Extreme Lifetime Distributions}\label{sec:ELD}
1999%In this section, we develop another model that can recover efficiently the extreme joint probability distribution of component lifetimes $\bm{\tilde{z}}$ that achieves the worst-case probability level (\ref{Prob-1}) for a given system design $\x$.
2000%
2001%\begin{proposition}\label{extreme}
2002%Given a system design $\x$, the extreme probability distribution of the component lifetimes  $\bm{\tilde{z}}$ can be constructed by the following discrete distribution with $|\mathbf{N}|$+1 scenarios:
2003%\begin{equation}\label{equ:extreme1-1H}
2004%\P^*\!\!\left[\tilde{z}^{k}_{ij}=\frac{b^*^{k}_{ij}}{p^*_i}, \forall j \in \mathbf{M}_i, k\in \mathbf{K}_{ij}; \tilde{z}_{ljk}=\frac{d^*_{iljk}}{p^*_i},\forall l\in \mathbf{N}\backslash \{i\}, j \in \mathbf{M}_l, k\in \mathbf{K}_{lj} \right]=p^*_i, \forall i \in \mathbf{N},
2005%\end{equation}
2006%and
2007%\begin{equation}\label{equ:extreme1-2H}
2008%\P^*\!\!\left[\tilde{z}^{k}_{ij}=\frac{s^*^{k}_{ij}}{p^*_{|\mathbf{N}|+1}}, \forall i \in \mathbf{N}, j \in \mathbf{M}_i, k\in \mathbf{K}_{ij}\right]=p^*_{|\mathbf{N}|+1},
2009%\end{equation}
2010%where $\b^*, \d^*, \s^*$ and $\p^*$ can be obtained by solving the following linear program:
2011%\begin{equation}\label{equ:extreme2H}\left.
2012% \begin{array}{rcll}
2013%& \max\limits_{\b,\d,\s,\p}  & \displaystyle \sum\limits_{i\in \mathbf{N}}p_i\\[0.2 cm]
2014%&{\rm s.t.} & \displaystyle \sum\limits_{i\in \mathbf{N}}p_i + p_{|\mathbf{N}|+1}=1 & \\[0.2 cm]
2015%&&\displaystyle b^{k}_{ij} x^{k}_{ij} \le p_i \mathcal{T}_R, \forall i \in \mathbf{N}, j\in \mathbf{M}^{\rm a}_{i}, k\in \mathbf{K}_{i j}   \\[0.2 cm]
2016%&&\displaystyle \sum_{j\in \mathbf{M}^{\rm c}_{i}}\sum_{k\in \mathbf{K}_{i j}}b^{k}_{ij} x^{k}_{ij} \le p_i \mathcal{T}_R,  \forall i \in \mathbf{N} \\[0.2 cm]
2017%&&\displaystyle \underline{\mu}_{ij}\le b^{k}_{ij}+\sum_{l\in \mathbf{N}\backslash \{i\}}d_{lijk} + s^{k}_{ij} \le \overline{\mu}_{ij}, \forall i \in \mathbf{N}, j\in \mathbf{M}_{i}, k\in \mathbf{K}_{i j}   \\[0.2 cm]
2018%&&\displaystyle \underline{z}_{ij}p_i\le b^{k}_{ij}\le  \overline{{z}}_{ij}p_i, \forall i \in \mathbf{N}, j\in \mathbf{M}_i, k\in \mathbf{K}_{ij} \\[0.3 cm]
2019%&&\displaystyle \underline{z}_{lj}p_i\le d_{iljk}\le  \overline{{z}}_{lj}p_i,  \forall i \in \mathbf{N}, l\in \mathbf{N}\backslash \{i\}, j\in \mathbf{M}_l, k\in \mathbf{K}_{lj}\\[0.2 cm]
2020%&&\displaystyle  \underline{{z}}_{{ij}}p_{|\mathbf{N}|+1} \le s^{k}_{ij}\le \overline{{z}}_{{ij}}p_{|\mathbf{N}|+1},  \forall i \in \mathbf{N}, j\in \mathbf{M}_i, k\in \mathbf{K}_{ij}  \\[0.2 cm]
2021%%&&\displaystyle  \sum_{k \in \mathbf{K}_{ij}}\frac{|b^{k}_{ij}-\nu_{ij}p_i|}{|\mathbf{K}_{ij}||\sigma_{ij}|}+\\
2022%%&&\displaystyle\sum_{l\in \mathbf{N}\backslash \{i\}}\sum_{k \in \mathbf{K}_{ij}}\frac{|d_{lijk}-\nu_{ij}p_l|}{|\mathbf{K}_{ij}||\sigma_{ij}|}+&\\
2023%%&&\displaystyle\sum_{k \in \mathbf{K}_{ij}}\frac{|s^{k}_{ij}-\nu_{ij}p_{|\mathbf{N}|+1}|}{|\mathbf{K}_{ij}||\sigma_{ij}|}\le \epsilon_{ij}, & \forall i \in \mathbf{N}, j\in \mathbf{M}_i\\
2024%&&\displaystyle  \sum_{k \in \mathbf{K}_{ij}} \xi^{k}_{ij}+\sum_{l\in \mathbf{N}\backslash \{i\}}\sum_{k \in \mathbf{K}_{ij}} \eta_{lijk}+ \sum_{k \in \mathbf{K}_{ij}} \bm{z}eta^{k}_{ij}\le \epsilon_{ij}, \forall i \in \mathbf{N}, j\in \mathbf{M}_{i}\\[0.2 cm]
2025%&& \displaystyle b^{k}_{ij}-\nu_{ij}p_i  -\xi^{k}_{ij}|\mathbf{K}_{ij}||\sigma_{ij}|\le 0,  \forall i \in \mathbf{N}, j\in \mathbf{M}_{i}, k\in \mathbf{K}_{ij} \\[0.2 cm]
2026%&& \displaystyle \nu_{ij}p_i- b^{k}_{ij} -\xi^{k}_{ij}|\mathbf{K}_{ij}||\sigma_{ij}|\le 0,  \forall i \in \mathbf{N}, j\in \mathbf{M}_{i}, k\in \mathbf{K}_{ij} \\[0.2 cm]
2027%&& \displaystyle  d_{lijk}-\nu_{ij}p_l -\eta_{lijk}|\mathbf{K}_{ij}||\sigma_{ij}|\le 0, \forall i \in \mathbf{N}, l\in \mathbf{N}\backslash \{i\},  j\in \mathbf{M}_{l}, k\in \mathbf{K}_{lj}\\[0.2 cm]
2028%&& \displaystyle \nu_{ij}p_l -d_{lijk} -\eta_{lijk}|\mathbf{K}_{ij}||\sigma_{ij}|\le 0, \forall i \in \mathbf{N}, l\in \mathbf{N}\backslash \{i\}, j\in \mathbf{M}_{l}, k\in \mathbf{K}_{lj}\\[0.2 cm]
2029%&& \displaystyle  s^{k}_{ij}-\nu_{ij}p_{|\mathbf{N}|+1}- \bm{z}eta^{k}_{ij}|\mathbf{K}_{ij}||\sigma_{ij}|\le 0, \forall i \in \mathbf{N}, j\in \mathbf{M}_{i}, k\in \mathbf{K}_{ij}  \\[0.2 cm]
2030%&& \displaystyle  \nu_{ij}p_{|\mathbf{N}|+1}-s^{k}_{ij}-\bm{z}eta^{k}_{ij}|\mathbf{K}_{ij}||\sigma_{ij}| \le 0, \forall i \in \mathbf{N}, j\in \mathbf{M}_{i}, k\in \mathbf{K}_{ij} \\[0.2 cm]
2031%&& \p\in \mathbb{R}^{|\mathbf{N}|+1}_+,  \b \in \mathbb{R}^{K}, \s \in \mathbb{R}^{K}, \d \in  \mathbb{R}^{K(|\mathbf{N}|+1)}, \hxi \in \mathbb{R}_+^{K}, \heta \in \mathbb{R}_+^{K(|\mathbf{N}|+1)}, \hzeta \in \mathbb{R}_+^{K},
2032%%&& \p\in \mathbb{R}^{|\mathbf{N}|+1}_+,  \b \in \mathbb{R}^{K}, \s \in \mathbb{R}^{K}, \d \in  \mathbb{R}^{K(|\mathbf{N}|+1)}.
2033%\end{array}
2034%\right\}
2035%\end{equation}
2036%where $\hxi,\heta$ and $\hzeta$ are auxiliary variables.
2037%\end{proposition}
2038%\begin{proof}
2039%For the given system design $\x$, recall that
2040%\begin{eqnarray*}
2041%\inf\limits_{\P \in \mathbb{F}}\P\Big[\tilde{\bm{z} }\in \mathcal{Z}_i(\x),\forall i \in \mathbf{N} \Big]=\P^*\!\Big[\tilde{\bm{z} }\in \mathcal{Z}_i(\x),\forall i \in \mathbf{N} \Big]
2042%&=&1-\P^*\!\Big[\cup_{i\in \mathbf{N} }\{\bm{\tilde{z}} \in \overline{\mathcal{Z}}_i(\x)\} \Big]\\
2043%&=&1-\sup\limits_{\P \in \mathbb{F}}\P\Big[\cup_{i\in \mathbf{N} }\{\bm{\tilde{z}} \in \overline{\mathcal{Z}}_i(\x)\} \Big].
2044%\end{eqnarray*}
2045%It suffices to prove that the probability distribution $\P^*$ obtained in (\ref{equ:extreme1-1H})-(\ref{equ:extreme1-2H}) is the extreme distribution for
2046%\begin{equation}\label{equ:sup-1H}
2047%\sup\limits_{\P \in \mathbb{F}}\P\Big[\cup_{i\in \mathbf{N} }\{\bm{\tilde{z}} \in \overline{\mathcal{Z}}_i(\x)\} \Big].
2048%\end{equation}
2049%In fact, by proposition~\eqref{P-proposition1b}, the above supremum (\ref{equ:sup-1H}) is equivalent to the optimal objective of
2050%$$
2051%\min\left\{\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{k\in \mathbf{K}_{ij}} \left(\alpha^{k}_{ij}\underline{\mu}_{{ij}}+ \beta^{k}_{ij}\overline{\mu}_{{ij}}\right)+\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i}\epsilon_{ij}\lambda_{ij} + \tau: (\ref{HP1-ambiguity-LP-FL1})-(\ref{HP2-ambiguity-LP-FL}) \right\},
2052%$$
2053%which by duality argument is also equivalent the optimal objective of (\ref{equ:extreme2H}). Now we are left to show the probability distribution (\ref{equ:extreme1-1H})-(\ref{equ:extreme1-2H}) formed by the solution $(\b^*, \d^*, \s^*,\p^*)$ of (\ref{equ:extreme2H}) is a qualified distribution in the ambiguity set (\ref{ambiguity-set}). This can be readily seen from the constraints in (\ref{equ:extreme2H}), by which the probability distribution $\mathbb{P}^*$ identified in (\ref{equ:extreme1-1H})-(\ref{equ:extreme1-2H}) satisfies the following constraints:
2054%$$
2055%\left\{ \begin{array}{rcll}
2056%&& \displaystyle \sum\limits_{i\in \mathbf{N}}p^*_i + p^*_{|\mathbf{N}|+1}=1 & \\[0.3 cm]
2057%%&&\displaystyle \left(\frac{b_{igjk}}{p_i}\right)x^g^{k}_{ij} \le \mathcal{T}_R, \forall i \in \mathbf{N}, g \in \mathbf{G}_i, j\in \mathbf{M}^{\rm a}_{ig}, k\in \mathbf{K}^g_{i j}   \\[0.3 cm]
2058%%&&\displaystyle \sum_{j\in \mathbf{M}^{\rm c}_{ig}}\sum_{k\in \mathbf{K}^g_{i j}}\left(\frac{b_{igjk}}{p_i }\right) x^g^{k}_{ij} \le \mathcal{T}_R,  \forall i \in \mathbf{N}, g \in \mathbf{G}_i \\[0.3 cm]
2059%&&\displaystyle \underline{\mu}_{ij}\le p^*_i\left[\frac{b^*^{k}_{ij}}{p^*_i}\right]+\sum_{l\in \mathbf{N}\backslash \{i\}} p^*_l\left[\frac{d^*_{lijk}}{p^*_l}\right] + p^*_{|\mathbf{N}|+1}\left[\frac{s^*^{k}_{ij}}{p^*_{|\mathbf{N}|+1}}\right] \le \overline{\mu}_{ij}, \forall i \in \mathbf{N},  j\in \mathbf{M}_{i}, k\in \mathbf{K}_{i j}   \\[0.3 cm]
2060%&&\displaystyle \underline{z}_{ij}\le \frac{b^*^{k}_{ij}}{p^*_i}\le  \overline{{z}}_{ij}, \forall i \in \mathbf{N}, j\in \mathbf{M}_{i}, k\in \mathbf{K}_{ij} \\[0.3 cm]
2061%&&\displaystyle \underline{z}_{lj} \le \frac{d^*_{iljk}}{p^*_i}\le  \overline{{z}}_{lj},  \forall i \in \mathbf{N}, l\in \mathbf{N}\backslash \{i\}, j\in \mathbf{M}_{l}, k\in \mathbf{K}_{lj}\\[0.3 cm]
2062%&&\displaystyle  \underline{{z}}_{{ij}} \le \frac{s^*^{k}_{ij}}{p_{|\mathbf{N}|+1}}\le \overline{{z}}_{{ij}},  \forall i \in \mathbf{N}, j\in \mathbf{M}_{i}, k\in \mathbf{K}_{ij}  \\[0.3 cm]
2063%&&\displaystyle  p^*_i\left[\sum_{k \in \mathbf{K}_{ij}}\frac{|b^*^{k}_{ij}/p^*_i-\nu_{ij}|}{|\mathbf{K}_{ij}||\sigma_{ij}|}\right]+\sum_{l\in \mathbf{N}\backslash \{i\}}p^*_l\left[\sum_{k \in \mathbf{K}_{ij}}\frac{|d^*_{lijk}/p^*_l-\nu_{ij}|}{|\mathbf{K}_{ij}||\sigma_{ij}|}\right]+&\\
2064%&&\displaystyle p^*_{|\mathbf{N}|+1}\left[\sum_{k \in \mathbf{K}_{ij}}\frac{|s^*^{k}_{ij}/p^*_{|\mathbf{N}|+1}-\nu_{ij}|}{|\mathbf{K}_{ij}||\sigma_{ij}|}\right]\le \epsilon_{ij}, \forall i \in \mathbf{N},  j\in \mathbf{M}_{i},
2065%\end{array}\right\}
2066%$$
2067%which is exactly a qualified distribution in the ambiguity set $\mathbb{F}$ in (\ref{ambiguity-set}).
2068%% of $\P^*\!\Big[\tilde{\bm{z} }\in \mathcal{Z}_i(\x),\forall i \in \mathbf{N} \Big]$, by the duality argument, we can have the extreme probability level of the complementary event
2069%%$$
2070%%\P^*\!\Big[\cup_{i\in \mathbf{N} }\{\bm{\tilde{z}} \in \overline{\mathcal{Z}}_i(\x)\} \Big]=\P^*\!\left[\bigcup_{i\in \mathbf{N} }\Big\{z^{k}_{ij}x^{k}_{ij}\le \mathcal{T}_R, \forall j \in \mathbf{M}_i, k\in \mathbf{K}_{ij}\Big\} \right]
2071%%$$
2072%%solves the optimization problem (\ref{equ:extreme2}). It can be seen that the problem (\ref{equ:extreme2}) solves exactly for the discrete probability distribution of $\bm{\tilde{z}}$ in (\ref{equ:extreme1-1})-(\ref{equ:extreme1-2}) with $|\mathbf{N}|+1$ scenarios that achieves the highest (extreme) probability level of event
2073%%$$
2074%%\bigcup_{i\in \mathbf{N} }\Big\{z^{k}_{ij}x^{k}_{ij}\le \mathcal{T}_R, \forall j \in \mathbf{M}_i, k\in \mathbf{K}_{ij}\Big\}
2075%%$$
2076%%and satisfies the distributional requirement in the ambiguity set (\ref{ambiguity-set}).
2077%\end{proof}
2078%
2079%
2080%
2081%\section{Computational Study}\label{CS3}
2082%
2083%In this section, we demonstrate the performance of the developed models by performing sufficient numerical experiments and a case study. The computational study consists of three parts: (i) Evaluating the worst-case probability level, i.e., model (\ref{HP1-ambiguity-LP-FL})-(\ref{HP2-ambiguity-LP-FL}), for a given system design under different  parameter settings. (ii) Robust reliability optimization using model (\ref{HP1-ambiguity-MILP-FL1})-(\ref{HP1-ambiguity-MILP-FL2}). (iii) Performance comparison with probabilistic RAP model using out-of-sample testing. (iv) A real-life case of hybrid cold-standby and parallel high speed train system.
2084%
2085%
2086%
2087%All experiments were run on a PC with an Intel Core(TM) i7-4510U processor at 2.60 Ghz. All  MIP models were coded in Python and solved using the callback library of Mosek version 8.1.0.34.
2088%
2089%
2090%
2091%
2092%
2093%
2094%
2095%
2096%\subsection{Computing the reliability level}\label{sec:comp1}
2097%In the parts (i) and (ii) of numerical experiments (Sections~\ref{sec:comp1} and~\ref{sec:comp2}), we consider the following setting: a series-parallel system with 5 subsystems ($|\mathbf{N}|=5$), and each subsystem contains 3 types of components ($|\mathbf{M}_i|\equiv|\mathbf{M}|= 3, \forall i \in \mathbf{N}$) where each type is of 3 redundant components ($|\mathbf{K}_{ij}|\equiv 3, \forall i \in \mathbf{N}, j\in \mathbf{M}_i$). Furthermore, we set the distributional parameters of component lifetimes $([\underline{\hmu}, \overline{\hmu}], [\underline{\bm{z}}, \overline{\bm{z}}], \bm{\hsigma}, \hepsilon)$ in the ambiguity set $\mathbb{F}$  as follows.
2098%
2099%We utilize parameters $\chi \ge 0$ and $t \ge 0$ to adjust the gap level of $[\underline{\mu}_{ij}, \overline{\mu}_{ij}]$ and $[\underline{z}_{ij}, \overline{z}_{ij}]$, respectively, as follows
2100%$$
2101%\underline{\mu}_{ij}=\underline{\mu}^o_{ij}-\chi(\overline{\mu}^o_{ij}-\underline{\mu}^o_{ij}),\quad \overline{\mu}_{ij}=\overline{\mu}^o_{ij}+\chi(\overline{\mu}^o_{ij}-\underline{\mu}^o_{ij})
2102%$$
2103%where $\underline{\mu}^o_{ij} \in \mathbf{U}[10,13], \overline{\mu}^o_{ij} \in \mathbf{U}[15,18], \forall i \in [5], j \in [3]$, and
2104%$$
2105%\underline{z}_{ij}=\underline{z}^o_{ij}-t(\overline{z}^o_{ij}-\underline{z}^o_{ij}),\quad \overline{z}_{ij}=\overline{z}^o_{ij}+t(\overline{z}^o_{ij}-\underline{z}^o_{ij}),
2106%$$
2107%where $\underline{z}^o_{ij} \in \mathbf{U}[5,10], \overline{z}^o_{ij} \in \mathbf{U}[30,50], \forall i \in [5], j \in [3]$. Also we let
2108%$$\sigma_{ij}=\kappa \sigma^o_{ij}, \quad \epsilon_{ij}=\theta\epsilon^o_{ij},$$
2109%where $\sigma^o_{ij} \in \mathbf{U}[4,5], \epsilon^o_{ij} \in \mathbf{U}[0, 0.5], \forall i\in [5], j\in [3]$ and $\kappa, \theta \ge 0$ are the adjustable parameters. Therefore, by changing the values of $(\chi,t,\kappa,\theta)$, we can have different sets of parameters $([\underline{\hmu}, \overline{\hmu}], [\underline{\bm{z}}, \overline{\bm{z}}], \bm{\hsigma}, \hepsilon)$. Specifically, large values of $(\chi,t)$ correspond to the large gaps of $[\underline{\mu}_{ij}, \overline{\mu}_{ij}]$ and $[\underline{z}_{ij}, \overline{z}_{ij}], \forall i \in [5], j \in [3]$, while large values of $(\kappa,\theta)$ correspond to large values of $\sigma_{ij}$ and $\epsilon_{ij}, \forall i \in [5], j \in [3].$ Finally, we set the system lifetime requirement $\mathcal{T}_R \in [16, 19]$.
2110%
2111%
2112%In this first part of numerical experiments, we fix a system design $\x$, such that
2113%$$
2114%\left[\sum_{k \in \mathbf{K}_{ij}}x^{k}_{ij}\right]_{|\mathbf{N}||\mathbf{M}|}=\left[
2115%\begin{matrix}
2116%1 & 1 & 1  \\
2117%1 & 1 & 1 \\
2118%2 & 1 & 1 \\
2119%1 & 1 & 1  \\
2120%1 & 2 & 2  \\
2121%\end{matrix}
2122%\right],
2123%$$
2124%where each subsystem (out of five) is assigned with one or two components for each type (out of three). Using model (\ref{HP1-ambiguity-LP-FL})-(\ref{HP2-ambiguity-LP-FL}), we compute the worst-case probability level (\ref{Prob-1}) with different settings of  parameters $([\underline{\hmu}, \overline{\hmu}], [\underline{\bm{z}}, \overline{\bm{z}}], \bm{\hsigma}, \hepsilon)$ in the ambiguity set $\mathbb{F}$ and the parameter $\mathcal{T}_R$ of system lifetime requirement. The worst-case probability levels under different testing purposes are provided in the Figure~\ref{fig:exp-1}.
2125%
2126%
2127%
2128%
2129%\begin{figure}[h!]
2130%	\centering
2131%	\includegraphics[width=1\linewidth]{exp-set-1}
2132%	\caption{\footnotesize The worst-case probability (reliability) level of a given system design with different settings of distributional parameters $([\underline{\hmu}, \overline{\hmu}], [\underline{\bm{z}}, \overline{\bm{z}}], \bm{\hsigma}, \hepsilon)$ and system lifetime requirement level $\mathcal{T}_R$. (a) The probability level vs. gap of $[\underline{\hmu}, \overline{\hmu}]$ under different $\mathcal{T}_R$ levels, where the gap enlarges as the parameter $\chi$ (`chi') increases. (b) The probability level vs. gap of $[\underline{\bm{z}}, \overline{\bm{z}}]$ under different $\mathcal{T}_R$ levels, where the gap enlarges as the parameter $t$ ('psi') increases. (c)The probability level vs. values of $\hsigma$ under different $\mathcal{T}_R$ levels, where the values of $\hsigma$ increase as the parameter $\kappa$ ('kappa') increases. (d) The probability level vs. values of $\hepsilon$ under different $\mathcal{T}_R$ levels, where the values of $\hepsilon$ increase as the parameter $\theta$ ('theta') increases.}
2133%	\label{fig:exp-1}
2134%\end{figure}
2135%
2136%The observations of the experimental results are the following:
2137%(i) the worst-case probability (reliability) level decreases when the variation range $[\underline{\hmu}, \overline{\hmu}]$ of expected lifetimes increases (Figure~\ref{fig:exp-1}-(a)), the variation range $[\underline{\bm{z}}, \overline{\bm{z}}]$ of the lifetimes increases (Figure~\ref{fig:exp-1}-(b)), or the difference in average among the lifetimes of the same type of components enlarges (Figure~\ref{fig:exp-1}-(c) and (d)). Such decreased probability is due to the enlarged ambiguity set $\mathbb{F}$ resulted by the changed of above distributional parameters $([\underline{\hmu}, \overline{\hmu}], [\underline{\bm{z}}, \overline{\bm{z}}], \bm{\hsigma}, \hepsilon)$. (ii) Intuitively, the probability level also decreases if the system lifetime requirement $\mathcal{T}_R$ is increased.
2138%(iii) The results in Figure~\ref{fig:exp-1} also provide the variation limits for the distributional parameters for an aspirational probability level with the given system lifetime requirement $\mathcal{T}_R$. For instance, if the decision maker needs the (worst-case) probability higher than 0.8 for system lifetime above $\mathcal{T}_R=16.2$, then the components lifetime range $[\underline{\bm{z}}, \overline{\bm{z}}]$ cannot be larger than the case such that parameter $t \ge 0.01$.
2139
2140
2141%\subsubsection{Experiment on $\underline{z}$ and $\overline{z}$}
2142%In this subsection, wo do experiments with different dispersion levels between the parameters $\bm{\underline{z}}$ and $\bm{\overline{z}}$. For simplicity, we also introduce an auxiliary variable $t\in[1,+\infty)$, and control the values of specified parameters as follow:
2143%\begin{eqnarray}
2144%\widetilde{\underline{z}}^{k}_{ij}' = \frac{1}{t} * \widetilde{\underline{z}}^{k}_{ij}&&\forall i\in I, j_i\in J_i, k_{ij}\in N_{ij}
2145%\end{eqnarray}
2146%\begin{eqnarray}
2147%\widetilde{\overline{z}}^{k}_{ij}' = t * \widetilde{\overline{z}}^{k}_{ij}&&\forall i\in I, j_i\in J_i, k_{ij}\in N_{ij}
2148%\end{eqnarray}
2149%where $\widetilde{\overline{z}}^{k}_{ij}'$ and $\widetilde{\overline{z}}^{k}_{ij}'$ are the experiment observation values of parameters $\underline{z}_{ij}$ and $\overline{z}_{ij}$, so the greater value of $t$, the greater of dispersion level between $\underline{z}_{ij}$ and $\overline{z}_{ij}$. The comparsion is shown in Figure~\ref{fig:xi}.
2150%
2151%\begin{figure}[h!]
2152%	\centering
2153%	\includegraphics[width=0.5\linewidth]{xi}
2154%	\caption[Obj value under $t$]{The objective values under different value of $t\in[1,2]$ of five $\mathcal{T}_R$ value cases ($\mathcal{T}_R\in\{15, 20, 25, 30, 35\}$), in each case, the result shows that the greater dispersion level between $\bm{\underline{z}}$ and $\bm{\overline{z}}$, the smaller obj value is.}
2155%	\label{fig:xi}
2156%\end{figure}
2157%
2158%\subsubsection{Experiment on $\mathcal{T}_R$}
2159%In this subsection, we do experiments with different $\mathcal{T}_R$ in candidate set $\{0, 5, 10, ..., 80, 85, 90\}$, the result is shown in Figure~\ref{fig:dl}.
2160%
2161%\begin{figure}[h!]
2162%	\centering
2163%	\includegraphics[width=0.5\linewidth]{Dl}
2164%	\caption[Obj value under $\mathcal{T}_R$]{The objective values under different value of parameter $\mathcal{T}_R$, with the value of $\mathcal{T}_R$ increase from 0 to 90, the objective value decrease from 1 to 0.}
2165%	\label{fig:dl}
2166%\end{figure}
2167%
2168%\subsubsection{Experiment on $\hsigma$}
2169%In this subsection, we do experiments with different value of $\bm{\sigma}$. For simplicity of experiments, we introduce an
2170%auxiliary variable $\kappa\in(0,+\infty)$ to control the value of $\sigma_{ij}$, $\forall i\in I$, $j_i\in J_i$:
2171%\begin{eqnarray}
2172%\widetilde{\sigma}_{ij}' = \kappa * \widetilde{\sigma}_{ij}&&\forall i\in I, j_i\in J_i, k_{ij}\in N_{ij}
2173%\end{eqnarray}
2174%where $\widetilde{\sigma}_{ij}'$ is the experiment observation values of parameters $\sigma_{ij}$ and $\widetilde{\sigma}_{ij}$ is the initial setting value of $\sigma_{ij}$.
2175%The result is shown in Figure~\ref{fig:sigma}.
2176%\begin{figure}[h!]
2177%	\centering
2178%	\includegraphics[width=0.5\linewidth]{sigma}
2179%	\caption[Obj value under $\bm{\sigma}$]{The objective values under different value of $\kappa\in[0,2.5]$ of nine $\mathcal{T}_R$ value cases ($\mathcal{T}_R\in\{10, 20, 30, 40, 50, 60, 70, 80, 90\}$), in each case, the result shows that the greater value of $\bm{\sigma}$ corresponds to the smaller objective value. All objective values will be 1 if $\mathcal{T}_R<10$.}
2180%	\label{fig:sigma}
2181%\end{figure}
2182%
2183%\subsubsection{Experiment on $\epsilon$}
2184%In this section, we do experiments with different values of parameter $\bm{\epsilon}$. Here we set
2185%$\epsilon_{ij}\in\{0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5\}$, $\forall i\in I$, $j_i\in J_i$ for simplicity and clear. The results is shown in Figure~\ref{fig:epsilon}.
2186%\begin{figure}[h!]
2187%	\centering
2188%	\includegraphics[width=0.5\linewidth]{epsilon}
2189%	\caption[Obj value under $\bm{\epsilon}$]{The objective values under different value of $\epsilon_{ij}$, $\forall i\in I$, $j_i\in J_i$ of nine $\mathcal{T}_R$ value cases ($\mathcal{T}_R\in\{10, 20, 30, 40, 50, 60, 70, 80, 90\}$), in each case, the result shows that the greater value of $\epsilon_{ij}$ corresponds to the smaller objective value.}
2190%	\label{fig:epsilon}
2191%\end{figure}
2192%%
2193%
2194%
2195
2196
2197%\subsection{Reliability Optimization}\label{sec:comp2}
2198%Next we perform the robust redundancy optimization using  model (\ref{HP1-ambiguity-MILP-FL1})-(\ref{HP1-ambiguity-MILP-FL2}) to find the optimal system design $\x^*$ with different combinations $(\mathcal{T}_R,R_0)$ of system lifetime requirement level and reliability requirement level. Specifically, we set the minimum number of components of each type as $L_{ij}=1, \forall i \in [5], j \in [3]$, and allow the $\mathcal{T}_R$ to vary from 16.5 to 19.0, and $R_0$ from 0.1 to 0.9.  The  optimal (minimal) cost is plotted in Figure~\ref{fig:exp-2}.
2199%\begin{figure}[h!]
2200%	\centering
2201%	\includegraphics[width=0.6\linewidth]{Figure_R0}
2202%	\caption{\footnotesize  Optimal cost with different system lifetime requirement $\mathcal{T}_R$ and reliability requirement $R_0$, where the vertical line implies the case of infeasibility.}
2203%	\label{fig:exp-2}
2204%\end{figure}
2205%
2206%From the results, the optimal cost and the number of components increase as the $\mathcal{T}_R$ or $R_0$ increases. In particular, Figure~\ref{fig:exp-2} implies, for each system lifetime requirement $\mathcal{T}_R$, there are several threshold levels of system reliability requirement $R_0$ beyond which no system design would be feasible. For instance, for $\mathcal{T}_R=18$, there is no feasible system design for the reliability level $R_0>0.7$, while when $\mathcal{T}_R$ is raised to $18.5$, there would be no feasible system design can be found even for the reliability level $R_0>0.2$. In other words, Figure~\ref{fig:exp-2} provides the physical limits of both the lifetime and reliability requirements for the system to meet, under the lifetime distributional ambiguity of components.
2207%
2208%%On the other hand, the Table~\ref{table-design} provides with the system design information (and also the cost information) with different combinations of $(\mathcal{T}_R, R_0)$. The designer can then choose the most suitable system designs with these information.
2209%%\begin{table}[h]\scriptsize%\tiny%\footnotesize
2210%%\begin{center}
2211%%	\caption{\footnotesize The optimal system redundancy design $\x^*$: the total number of components (of all types) in each  subsystem along with different system lifetime requirement $\mathcal{T}_R$ and reliability requirement $R_0$, where `$-$' indicates a no-solution case due to the overly high $\mathcal{T}_R$ and/or $R_0$. }\label{table-design}
2212%%	\begin{tabular}{|c|c||cccccccc c|}
2213%%		\hline
2214%%	%	\rule{0pt}{12pt} & && & & & $R_0$ & & & & \\[0pt]		\hline
2215%%		\multirow{2}{*}{\scriptsize  System Lifetime $\mathcal{T}_R$} & \multirow{2}{*}{\scriptsize  Subsystem $i$} & \multicolumn{9}{c|}{$R_0$}\\
2216%%\cline{3-11}
2217%%        && 0.1 & 0.2 & 0.3 & 0.4 & 0.5 & 0.6 & 0.7 & 0.8 & 0.9 \\[0pt]		
2218%%\hline
2219%%		% DL = 16.5
2220%%                  &1& 3 & 3 & 3 & 3 & 3 & 3 & 3 & 3 & 4 \\[0pt]
2221%%		          &2& 3 & 3 & 3 & 3 & 3 & 3 & 3 & 3 & 3 \\[0pt]
2222%%		  $16.5$  &3& 3 & 3 & 3 & 3 & 3 & 3 & 3 & 3 & 3 \\[0pt]
2223%%		          &4& 3 & 3 & 3 & 3 & 3 & 3 & 3 & 3 & 3 \\[0pt]
2224%%		          &5& 3 & 3 & 3 & 3 & 3 & 3 & 3 & 3 & 3 \\
2225%%		\hline
2226%%        % \multicolumn{2}{|c|}{Cost} &&&&&&&&--&-- \\ \hline
2227%%		% DL = 17.0
2228%%		          &1 & 3 & 3 & 4 & 3 & 4 & 5 & 4 & \multirow{5}{*}{--} & \multirow{5}{*}{--}  \\[0pt]
2229%%		          &2 & 3 & 3 & 3 & 4 & 4 & 4 & 5 &   &   \\[0pt]
2230%%		 $17.0$   &3 & 3 & 3 & 3 & 3 & 3 & 3 & 3 &   &   \\[0pt]
2231%%		          &4 & 3 & 3 & 3 & 3 & 3 & 3 & 3 &   &   \\[0pt]
2232%%		          &5 & 3 & 3 & 3 & 3 & 3 & 3 & 4 &   &   \\
2233%%		\hline
2234%%		%\multicolumn{2}{|c|}{Cost} &&&&&&&&--&-- \\ \hline
2235%%		          &1& 3 & 3 & 4 & 4 & 4 & 4 & 5 & \multirow{5}{*}{--} & \multirow{5}{*}{--} \\[0pt]
2236%%		          &2& 3 & 3 & 3 & 3 & 4 & 4 & 5 &   &   \\[0pt]
2237%%		  $17.5$  &3& 3 & 3 & 3 & 3 & 3 & 3 & 3 &   &   \\[0pt]
2238%%		          &4& 3 & 3 & 3 & 3 & 3 & 3 & 3 &   &   \\[0pt]
2239%%		          &5& 3 & 3 & 3 & 3 & 3 & 4 & 4 &   &   \\
2240%%		\hline
2241%%		%\multicolumn{2}{|c|}{Cost} &&&&&&&&--&-- \\ \hline
2242%%		          &1& 3 & 4 & 3 & 4 & 5 & 5 & 5 & \multirow{5}{*}{--}  & \multirow{5}{*}{--} \\[0pt]
2243%%		          &2& 3 & 3 & 4 & 4 & 4 & 4 & 5 &   &   \\[0pt]
2244%%		 $18.0$   &3& 3 & 3 & 3 & 3 & 3 & 3 & 3 &   &   \\[0pt]
2245%%		          &4& 3 & 3 & 3 & 3 & 3 & 3 & 3 &   &   \\[0pt]
2246%%		          &5& 3 & 3 & 3 & 3 & 3 & 4 & 5 &   &   \\
2247%%		\hline
2248%%		%\multicolumn{2}{|c|}{Cost} &&&&&&&&--&-- \\ \hline
2249%%		          &1& 4 & 5 & \multirow{5}{*}{--} & \multirow{5}{*}{--} & \multirow{5}{*}{--} & \multirow{5}{*}{--} & \multirow{5}{*}{--} & \multirow{5}{*}{--} & \multirow{5}{*}{--} \\[0pt]
2250%%		          &2& 5 & 5 &   &   &   &   &   &   &   \\[0pt]
2251%%		 $18.5$   &3& 5 & 5 &   &   &   &   &   &   &   \\[0pt]
2252%%		          &4& 4 & 5 &   &   &   &   &   &   &   \\[0pt]
2253%%		          &5& 4 & 5 &   &   &   &   &   &   &   \\
2254%%		\hline
2255%%%\multicolumn{2}{|c|}{Cost} &&&&&&&&--&-- \\ \hline
2256%%		          &1& 5 & \multirow{5}{*}{--} & \multirow{5}{*}{--} & \multirow{5}{*}{--} & \multirow{5}{*}{--} & \multirow{5}{*}{--} & \multirow{5}{*}{--} & \multirow{5}{*}{--} & \multirow{5}{*}{--} \\[0pt]
2257%%		          &2& 5 &   &   &   &   &   &   &   &   \\[0pt]
2258%%		  $19.0$  &3& 5 &   &   &   &   &   &   &   &   \\[0pt]
2259%%		          &4& 5 &   &   &   &   &   &   &   &   \\[0pt]
2260%%		          &5& 5 &   &   &   &   &   &   &   &   \\
2261%%		\hline
2262%%%\multicolumn{2}{|c|}{Cost} &&&&&&&&--&-- \\ \hline
2263%%	\end{tabular}
2264%%\end{center}
2265%%\end{table}
2266%
2267%%\subsubsection{Performance comparison with probabilistic reliability model}
2268%%\begin{equation}\label{RO-Model}
2269%%\begin{array}{rcll}
2270%%& \min\limits_{\x} &  \sum\limits_{i\in \mathbf{N}} \sum\limits_{j \in \mathbf{M}_i}\left[ \sum\limits_{k\in \mathbf{K}_{ij}} x^{k}_{ij}\right]c_{ij} \\[0.3 cm]
2271%%&{\rm s.t.} &  \min\limits_{i \in \mathbf{N}} \left(\max\limits_{j \in \mathbf{M}_i}  \max\limits_{k \in \mathbf{K}_{ij}}   z^{k}_{ij} x^{k}_{ij}\right)\ge \mathcal{T}_R   & \forall \bm{z} \in \mathcal{U}(\Gamma) \\[0.3 cm]
2272%%&&  \sum\limits_{k\in \mathbf{K}_{ij}} x^{k}_{ij}\ge  L_{ij}, & \forall i \in \mathbf{N}, j\in \mathbf{M}_i\\
2273%%&& x^{k}_{ij} \in \{0,1\},  & \forall i \in \mathbf{N}, j\in \mathbf{M}_i, k \in \mathbf{K}_{ij}.
2274%%\end{array}
2275%%\end{equation}
2276%%The above budgeted robust convex (nonlinear) optimization model is in general intractable even when uncertainty set $\mathcal{U}(\Gamma)$ is a general polytope, by noting that the robust counterpart of convex (piecewise linear) constraints
2277%%$$
2278%%\max\limits_{j \in \mathbf{M}_i}  \max\limits_{k \in \mathbf{K}_{ij}}   z^{k}_{ij} x^{k}_{ij} \ge \mathcal{T}_R, \forall i \in \mathbf{N}
2279%%$$
2280%%cannot be tractably dualized.  Nevertheless, under the independence condition of component lifetimes $\tilde{z}^{k}_{ij}, \forall i \in \mathbf{N}, j\in \mathbf{M}_i, k \in \mathbf{K}_{ij}$, if we define
2281%%$$
2282%%\mathcal{U}(\gamma):=\left\{\tilde{z}^{k}_{ij}\in \Big[\underline{{z}}_{ij}(\gamma), \overline{{z}}_{ij}(\gamma) \Big], \forall i \in \mathbf{N}, j\in \mathbf{M}_i, k \in \mathbf{K}_{ij} \right\},
2283%%$$
2284%%then the above regular robust reliability model can be equivalently expressed as the following MIP problem:
2285%%\begin{equation}\label{RO-Model}
2286%%\begin{array}{rcll}
2287%%& \min\limits_{\x} &  \sum\limits_{i\in \mathbf{N}} \sum\limits_{j \in \mathbf{M}_i}\left[ \sum\limits_{k\in \mathbf{K}_{ij}} x^{k}_{ij}\right]c_{ij} \\[0.3 cm]
2288%%&{\rm s.t.} &   \max\limits_{j \in \mathbf{M}_i}  \max\limits_{k \in \mathbf{K}_{ij}}  \underline{{z}}_{ij}(\gamma) x^{k}_{ij} \ge \mathcal{T}_R,   & \forall i \in \mathbf{N} \\[0.3 cm]
2289%%&&  \sum\limits_{k\in \mathbf{K}_{ij}} x^{k}_{ij}\ge  L_{ij}, & \forall i \in \mathbf{N}, j\in \mathbf{M}_i\\
2290%%&& x^{k}_{ij} \in \{0,1\},  & \forall i \in \mathbf{N}, j\in \mathbf{M}_i, k \in \mathbf{K}_{ij}.
2291%%\end{array}
2292%%\end{equation}
2293%\subsection{Out-of-sample performance comparison with the probabilistic model}\label{sec:test-1}
2294%To further illustrate the performance of our robust reliability model, we compare the design obtained from the proposed robust redundancy optimization  model (\ref{HP1-ambiguity-MILP-FL1})-(\ref{HP1-ambiguity-MILP-FL2}), termed {\em robust design} with the design of the probabilistic redundancy optimization model, termed {\em probabilistic design}. As mentioned in the Introduction, when the situation  involves multiple types (i.e. $|\mathbf{M}_i|>1$), or both the cold-standby and active parallel redundant subsystems are considered, the probabilistic model in general becomes intractable (as mentioned in the sections of Introduction and Literature Review). Therefore, for a fair comparison, we consider a series-parallel system with $|\mathbf{M}_i|=1, \forall i \in [5]$, which preserves a linear MIP formulation. For a coherent exposition of the experimental study, we place the details of the probabilistic redundancy model as well as its MIP transformation in the Appendix.
2295%
2296%In particular, we first randomly generate lifetime samples (size=100), and then  compute the probability levels $\P[\tilde{z}_{i}\le \mathcal{T}_R ], \forall i \in [5]$ and the parameters $([\underline{\hmu}, \overline{\hmu}], [\underline{\bm{z}}, \overline{\bm{z}}], \bm{\hsigma}, \hepsilon)$ from the generated lifetime samples  for parameter inputs of probabilistic and robust models, respectively, where $R_0=0.9$ and $\mathcal{T}_R=18,19$ and $20$. We obtain probabilistic and robust designs by solving the perspective redundancy models. Then we perform 1000 sets of out-of-sample experiments where in each set, 1000 lifetime data points are generated using the sample mean and StD of the original lifetime samples. The out-of-sample system lifetimes as well as its mean values of both probabilistic and robust designs are compared and plotted in Figure~\ref{Fig:lifetime-compare1}, and the comparison of out-of-sample reliability levels is provided in Table~\ref{tab-compare2}.
2297%
2298%
2299%
2300%
2301%
2302%
2303%\begin{figure}[h*]
2304%  \centering
2305%  \subfigure[]{\includegraphics[scale=0.35]{lifetime-1}}\!\!\!\!\!\!\!\!
2306%  \subfigure[]{\includegraphics[scale=0.35]{lifetime-2}}\!\!\!\!\!\!\!\!
2307%  \subfigure[]{\includegraphics[scale=0.35]{lifetime-3}}\!\!\!\!\!\!\!\!
2308%  \subfigure[]{\includegraphics[scale=0.35]{mean-lifetime-1}}\!\!\!\!\!\!\!\!
2309%  \subfigure[]{\includegraphics[scale=0.35]{mean-lifetime-2}}\!\!\!\!\!\!\!\!
2310%  \subfigure[]{\includegraphics[scale=0.35]{mean-lifetime-3}}\!\!\!\!\!\!\!\!
2311%\caption{\footnotesize (a)-(c) The out-of-sample system lifetime scenarios (in 1 set of experiments with 1000 lifetime data) of probabilistic and robust design under $\mathcal{T}_R=18,19$ and $20$, respectively.
2312%(d)-(f) The (sample) mean values of out-of-sample system lifetimes (in 1000 sets of experiments) of probabilistic and robust design under $\mathcal{T}_R=18,19$ and $20$, respectively.}
2313%\label{Fig:lifetime-compare1}
2314%\end{figure}
2315%
2316%
2317%
2318%\begin{table}[h*]\footnotesize%\small%
2319%\caption{\label{tab-compare2} \footnotesize  Out-of-sample reliability levels comparison ($R_0=0.9$), where the `Design' specifies the number of redundant components allocated in each of 5 subsystems, `P-Model' and `C-DRO-Model' refer to the probabilistic model and conditionally robust model, respectively. }
2320%\begin{center}
2321%\begin{tabular}{|c|c|| c| c |c |c   |}\hline
2322%    $\mathcal{T}_R$ &Model & Design &   Designed reliability level &Mean of out-of-sample reliability level & StD \\
2323%     \hline
2324%    \multirow{2}{*}{18}& P-Model & (1,1,1,1,1) & \multirow{2}{*}{$R_0=0.9$}& 0.617 & 0.015   \\
2325%     & C-DRO-Model & (2,1,2,2,2) &  &0.873 & 0.010  \\
2326%\hline
2327%    \multirow{2}{*}{19}& P-Model & (2,1,1,1,1)&\multirow{2}{*}{$R_0=0.9$} & 0.564 & 0.015   \\
2328%     & C-DRO-Model & (2,1,2,2,2) && 0.884 & 0.009  \\
2329%\hline
2330%    \multirow{2}{*}{20}& P-Model & (2,1,1,3,2) &\multirow{2}{*}{$R_0=0.9$}& 0.554 & 0.016   \\
2331%     & C-DRO-Model & (2,2,3,3,2) && 0.909 & 0.009  \\
2332%\hline
2333%\end{tabular}
2334%\end{center}
2335%\end{table}
2336%
2337%
2338%From the comparative results in Figure~\ref{Fig:lifetime-compare1} and Table~\ref{tab-compare2}, the robust designs in all the different system lifetime requirements ($\mathcal{T}_R=18,19$ and $20$)  provide more reliable performance profiles than the probabilistic designs, in protecting the reliability level $R_0$. Specifically, in each set of out-of-sample tests, compared with the probabilistic design, the robust design pulls the sample system lifetimes (Figure~\ref{Fig:lifetime-compare1}:(a)-(c)) and sample mean of the system lifetimes (Figure~\ref{Fig:lifetime-compare1}:(d)-(f)) further away from the lifetime requirement $\mathcal{T}_R$ the to the right side, forming a wider safety gap which is able to absorb more lifetime uncertainty. Furthermore, from Table~\ref{tab-compare2}, the sample mean of reliability levels achieved by robust design is $0.873, 0.884$ and $0.909$ under different $\mathcal{T}_R$ levels which is very close or exceeds the designed level $R_0=0.9$ of reliability requirement. In contrast, the probabilistic design can only achieve $0.617, 0.564$ and $0.554$ in perspective $\mathcal{T}_R$ levels, which fall to meet the designed reliability level. Also, reliability levels achieved by robust design have a smaller variance than that of probabilistic design.
2339%On the other hand, although it is noted from the specifications of the `Design' column in Table~\ref{tab-compare2} that the higher performance achieved by the robust design is at the cost of using more components, our distributional robust design is not {\em ad-hoc}, but is calibrated by the distributional information of component lifetimes as given in the ambiguity set. In other words, it provides a more informed decision of redundancy allocation that can ensure the designed system reliability level given the component lifetime distributions being consistent to the characteristics of the ambiguity set.
2340%
2341%
2342%
2343%The implications of the comparison is that when the component lifetimes are highly uncertain, it could be risky to implement the probabilistic design (from the redundancy model that assumes known component reliability levels), since the actual reliability level achieved could be far below the designed level. In this situation, the robust design, albeit with more conservative redundancy allocation, is able to protect (or at least be very close to) the designed reliability level. Hence, it effectively reduces the risk of the system being `unreliable'.
2344%%\begin{eqnarray*}
2345%%\begin{array}{rcll}
2346%%& \min\limits_{\y} &  \sum\limits_{i\in \mathbf{N}} \sum\limits_{j \in \mathbf{M}_i}\left[y_{ij}\right]c_{ij} \\[0.3 cm]
2347%%& {\rm s.t.} &  \displaystyle \prod_{i \in \mathbf{N}}\left[1-\prod_{j \in \mathbf{M}_i}(r_{ij})^{y_{ij}}\right]\ge R_0  & \\[0.3 cm]
2348%%&& L_{ij}\le y_{ij}\le U_{ij}, & \forall i \in \mathbf{N}, j\in \mathbf{M}_i\\
2349%%&& y_{ij} \in \mathbb{Z}_+,  & \forall i \in \mathbf{N}, j\in \mathbf{M}_i,
2350%%\end{array}
2351%%\end{eqnarray*}
2352%%where
2353%%$$
2354%%r_{ij}=\P\Big[\tilde{z}_{ij}\le \mathcal{T}_R \Big],
2355%%$$
2356%%which can be estimated from the data. By transforming the integer variable $y_{ij}$ with binaries $x^{k}_{ij}$:
2357%%$$
2358%%y_{ij}=L_{ij}+\sum_{k=0}^{U_{ij}-L_{ij}}kx^{k}_{ij},~\mbox{with}~\sum_{k=0}^{U_{ij}-L_{ij}}x^{k}_{ij}=1
2359%%$$
2360%%The above model can be readily linearized as the following MIP with binaries:
2361%%\begin{eqnarray}
2362%%\begin{array}{rcll}
2363%%& \min\limits_{\x} & \displaystyle \sum\limits_{i\in \mathbf{N}} \sum\limits_{j \in \mathbf{M}_i}\left[L_{ij}+\sum_{k=0}^{U_{ij}-L_{ij}}kx^{k}_{ij}\right]c_{ij} \\[0.3 cm]
2364%%& {\rm s.t.} &  \displaystyle \sum_{i \in \mathbf{N}} \sum_{k=0}^{U_{ij}-L_{ij}}x^{k}_{ij}\ln\left[1-\prod_{j \in \mathbf{M}_i} (r_{ij})^{L_{ij}+k} \right]\ge \ln R_0  & \\[0.3 cm]
2365%%&& \displaystyle \sum_{k=0}^{U_{ij}-L_{ij}}x^{k}_{ij}=1, & \forall i \in \mathbf{N}, j\in \mathbf{M}_i\\
2366%%&& x^{k}_{ij} \in \{0,1\},  & \forall i \in \mathbf{N}, j\in \mathbf{M}_i, k \in \mathbf{K}_{ij}
2367%%\end{array}
2368%%\end{eqnarray}
2369%
2370%
2371%
2372%
2373%
2374%\subsection{A case of air brake system in high speed train}\label{sec:rlcs}
2375%A high speed train typically consists of several electric multiple units (EMUs) which are self-propelled carriages using electricity as the motive power. The air brake system is one critical subsystem of the EMU \cite{Hasegawa1999}. As shown in Figure \ref{figure-cs}, it mainly consists of one air compressor which generates compressed air, one check valve which ensures single direction of the airflow, one main air reservoir which stores the compressed air for braking operation, one relay valve which controls the air pressure according to the received signal and one boosting cylinder which transfers the air pressure to the amplified oil pressure to active the basic braking mechanisms to slow down or stop the unit \cite{Hasegawa1999,Cheng2009}.
2376%
2377%\begin{figure}[h]
2378%\centering
2379%\includegraphics[scale=0.5]{figure-case-study.pdf}
2380%\caption{\small Diagram of the air brake system of a EMU}
2381%\label{figure-cs}
2382%\end{figure}
2383%
2384%The high speed train is highly reliability-demanding, since one small accident would result to large number of passenger life losses given a high operational speed. To enhance the reliability of the air brake system, different types of redundancies, e.g. cold standby or hot standby, need to be introduced. For the two valves cold standbies are used, whereas for the rest components hot standbies are needed. The parameters of the various components are presented in Table \ref{tab-cs}, which are modified based the real data from a railway company in Europe, due to confidential agreement. Due to the space limit, the maximal number of components connected in parallel can not excess 4 and there must be at least one component in each subsystem. This system is required to operate without failure for 15-20 years at probability of 0.95-0.99.
2385%
2386%\begin{table}[t*]\scriptsize%\footnotesize%\small%
2387%\caption{\label{tab-cs} \footnotesize  Lifetime parameters of the available components for air brake system.}
2388%\begin{center}
2389%\begin{tabular}{|c| c | c || c c c  c |}\hline
2390%     Subsystem & Redundancy Strategy & Component Type & $\underline{z}_{ij}$ (yrs) & $\overline{z}_{ij}$ (yrs) &$\mu_{ij}$ (yrs) &  $c_{ij}$ (k\$) \\
2391%     \hline
2392%                    &\multirow{5}{*}{Active redundant} & 1 & 10 & 23 &15 & 10.0  \\
2393%                    & &2 & 12 & 21 &16& 15.0 \\
2394%     Air compressor & &3 & 16 & 24 &20& 17.0   \\
2395%                    & &4 & 17 & 25 &22& 25.0  \\
2396%                    & &5 & 20 & 26 &23& 32.0 \\
2397%     \hline
2398%                    &\multirow{5}{*}{Cold Standby} &1 & 4 & 6 &10& 1.0   \\
2399%                    & &2 & 6 & 10.5 &8& 1.5 \\
2400%      Check valve   & &3 & 7.5 & 11 &9& 2.0   \\
2401%                    & &4 & 8.5 & 12 &10& 2.5 \\
2402%                    & &5 & 15 & 17.5 &16& 3.0   \\
2403%     \hline
2404%                    &\multirow{6}{*}{Cold Standby} &1 & 4.5 & 8 &6& 1.5  \\
2405%                    & &2 & 6 & 9 &7& 2.0   \\
2406%   Control valve    & &3 & 7.5 & 10 &9& 2.5  \\
2407%                    & &4 & 8 & 11 &10& 3.0  \\
2408%                    & &5 & 10 & 12 &10.5& 3.5  \\
2409%                    & &6 & 10.5 & 12.5 &12& 4.0  \\
2410%\hline
2411%                    &\multirow{5}{*}{Active redundant} &1  & 15  & 21 &18& 15.0 \\
2412%                    & &2  & 17  & 22 &20& 20.5 \\
2413% Main air reservoir && 3  & 18  & 23 &21& 23.5 \\
2414%                    & &4  & 19  & 26 &23& 25.5 \\
2415%                    & &5  & 20  & 27 &25& 30.0 \\
2416% \hline
2417%                    &\multirow{5}{*}{Active redundant} &1  & 14  & 20 &16& 20.0 \\
2418%                    & &2  & 16  & 21 &18& 22.5 \\
2419%  Boosting cylinder & &3  & 18  & 24 &22& 25.5 \\
2420%                    & &4  & 20  & 28 &24& 30.5 \\
2421%                    & &5  & 25  & 31 &28& 34.0 \\
2422%\hline
2423%\end{tabular}
2424%\end{center}
2425%\end{table}
2426%
2427%Implementing the proposed robust redundancy allocation model, we can obtain the design table (Table~\ref{d-table}) which provides with the system design information (and also the cost information) with different combinations of $(\mathcal{T}_R, R_0)$. The designer can then choose the most suitable system designs with these information. From Table~\ref{d-table}, we see that as the system lifetime requirement $\mathcal{T}_R$ or system reliability level $R_0$ increases, the design allocates more components (e.g. the number of components allocated to `Air compressor' is increased from 1 to 2 as $\mathcal{T}_R$ increases from 19 yrs to 20 yrs, under $R_0=0.95$) and/or shifts the components to more reliable ones (with longer expected lifetimes, e.g. the 1 component allocated to `Main air reservoir' is shifted from  Type 2 to Type 5, as $\mathcal{T}_R$ increases from 16 yrs to 19 yrs, under $R_0=0.99$).
2428%
2429%\begin{sidewaystable}[!htbp]\scriptsize%\footnotesize%\small%
2430%\caption{\label{tab-cs} \footnotesize  Design table for the air brake system: The number of components of each allocated in each subsystem for $R_0=0.95, 0.97$ and $0.99$, and $\mathcal{T}_R=15-20$ yrs, where no feasible design can be found for $R_0=0.99, \mathcal{T}_R=20$.}\label{d-table}
2431%\begin{center}
2432%\begin{tabular}{|c| c|| cccccc || cccccc || cccccc |}\hline
2433%\multirow{2}{*}{Subsystem} & \multirow{2}{*}{Type} & \multicolumn{6}{c||}{$R_0=0.95$,~$\mathcal{T}_R$ (yrs) }& \multicolumn{6}{c||}{$R_0=0.97$,~$\mathcal{T}_R$ (yrs)}& \multicolumn{6}{c|}{$R_0=0.99$,~$\mathcal{T}_R$ (yrs)} \\
2434% \cline{3-20}
2435%      &&15 & 16 & $17$ &  $18$& $19$& $20$&$15$ & $16$ & $17$ &  $18$& $19$& $20$&$15$ & $16$ & $17$ &  $18$& $19$ & 20 \\
2436%     \hline
2437%                    & 1 & 0 & 0 &0 & 0 & 0& 0   & 0 & 0 &0 & 0& 0 & 0   & 0 & 0 &0 & 0& 0 &\multirow{5}{*}{--} \\
2438%      Air           & 2 & 0 & 0 &0 & 0 & 0& 0   & 0 & 0 &0 & 0&0  & 0   & 0 & 0 &0& 0 &0 &\\
2439%     compressor     & 3 & 1 & 1 &1 & 1 & 0& 0   & 1 & 1 &0 & 0&0  & 0   & 1 & 0 &0& 0 &0 & \\
2440%                    & 4 & 0 & 0 &0 & 0 & 1& 2   & 0 & 0 &1 & 1&0  & 0   & 0 & 1 &0& 0 &0 &\\
2441%                    & 5 & 0 & 0 &0 & 0 & 0& 0   & 0 & 0 &0 & 0&1  & 2   & 0 & 0 &1& 1 &1 &\\
2442%     \hline
2443%                    & 1 & 1 & 1 &1& 1 & 0 & 0   & 1 & 1 &1& 1 & 0 & 0   & 1 & 1 &1& 1 & 0 &\multirow{5}{*}{--} \\
2444%       Check        & 2 & 0 & 2 &0& 0 & 1 & 1   & 0 & 0 &0& 0 & 1& 1    & 0 & 0 &0& 0 & 1 &\\
2445%      valve         & 3 & 0 & 0 &0& 0 & 0 & 0   & 0 & 0 &0& 0 & 0 & 1   & 0 & 0 &0& 0 & 0 & \\
2446%                    & 4 & 0 & 0 &0& 0 & 0 & 0   & 0 & 0 &0& 0 & 0& 0    & 0 & 0 &0& 0 & 0 &\\
2447%                    & 5 & 1 & 0 &1& 1 & 1 & 1   & 1 & 1 &1& 1 & 1 & 1   & 1 & 1 &1& 1 & 1 &\\
2448%     \hline
2449%                    & 1 & 0 & 0 &1& 1 & 1& 1   & 0 & 3 &0& 0 & 1& 1    & 0 & 1 &0& 1 & 1 &\multirow{5}{*}{--}\\
2450%       Control      & 2 & 0 & 0 &1& 0 & 0 & 1   & 0 & 0 &0& 2 & 0& 1    & 0 & 2 &0& 1 & 0 &\\
2451%   valve            & 3 & 1 & 1 &1& 2 & 2& 0    & 1 & 1 &1& 1 & 2& 0    & 1 & 0 &1& 0 & 2 &\\
2452%                    & 4 & 1 & 0 &0& 0 & 0& 0    & 1 & 0 &0& 0 & 0& 0    & 1 & 0 &0& 1 & 0 &\\
2453%                    & 5 & 0 & 0 &0& 0 & 0& 1    & 0 & 0 &1& 0 & 0& 1    & 0 & 0 &1& 0 & 0 &\\
2454%                    & 6 & 0 & 0 &0& 0 & 0& 0    & 0 & 0 &0& 0 & 0& 0    & 0 & 0 &0& 0 & 0 &\\
2455%\hline
2456%                    & 1  & 1  & 0 &0& 0 & 0& 0  & 0 & 0 &0& 0 & 0& 0    & 0 & 0 &0& 0 & 0&\multirow{5}{*}{--}\\
2457% Main               & 2  & 0  & 1 &0& 0 & 0& 0  & 1 & 1 &0& 0 & 0& 0    & 1 & 1 &0& 0 & 0&\\
2458% air reservoir      & 3  & 0  & 0 &1& 0 & 0& 0  & 0 & 0 &1& 0 & 0& 0    & 0 & 0 &1& 0 & 0&\\
2459%                    & 4  & 0  & 0 &0& 1 & 0& 0  & 0 & 0 &0& 1 & 1& 0    & 0 & 0 &0& 1 & 0&\\
2460%                    & 5  & 0  & 0 &0& 0 & 1& 1  & 0 & 0 &0& 0 & 0& 2    & 0 & 0 &0& 0 & 1&\\
2461% \hline
2462%                    & 1  & 0  & 0 &0& 0 & 0& 0     & 0 & 0 &0& 0 & 0& 0    & 0 & 0 &0& 0 & 0&\multirow{5}{*}{--}\\
2463% Boosting           & 2  & 1  & 0 &0& 0 & 0& 0    & 1 & 0 &0& 0 & 0& 0    & 1 & 0 &0& 0 & 0&\\
2464% cylinder           & 3  & 0  & 1 &1& 0 & 0& 0    & 0 & 1 &1& 0 & 0& 0    & 0 & 1 &1& 0 & 0&\\
2465%                    & 4  & 0  & 0 &0& 1 & 1& 0    & 0 & 0 &0& 1 & 1& 0    & 0 & 0 &0& 1 & 1&\\
2466%                    & 5  & 0  & 0 &0& 0 & 0& 1    & 0 & 0 &0& 0 & 0& 1    & 0 & 0 &0& 0 & 0&\\
2467%\hline
2468%\multicolumn{2}{|c||}{Design cost (k\$)} & 64& 72.5 &76&\!\! 83.5 &\!\! 96.5&\!\! 125.5    & 69.5 & 72.5 &84& 91.5 & 99& 169.5    & 69.5& \!\! 80.5\!\! &91\!\!& 98.5\!\! & 103.5\!\!&--\\
2469%\hline
2470%\end{tabular}
2471%\end{center}
2472%\end{sidewaystable}
2473
2474\section{Computational Study}
2475In this section we present numerical experiments of our model, as well as a case study. The computational study consists of six parts: (i) visualizing the result of dimension reduction and clustering; (ii) testing how design changes when parameters vary, including $K$ and other hyperparameters; (iii) choosing best parameter $K$ by cross validation; (iv) experimenting on the value of side information ;  (v)comparing our design with a baseline probabilistic model  and (vi) a real-life case of maintenance in high speed train industry. The distribution used in experiment (i)-(v) is generated by an accountable real data set from (Wang~et~al.~2019).
2476All computational result were done on a PC with Intel(R) Core(TM) i7-7500U CPU at 2.7 GHz, coded in Python. The MIP models were solved by a solver library called Gurobi, version 8.1.1.
2477
2478\subsection{Visualizing clusters}
2479In this section we present a visualization of dimension reduction and clustering. We first apply tSNE algorithm, a state-of-art algorithm for dimension reduction and visualization, \iffalse (refer to Maaten L, Hinton G. Visualizing data using t-SNE[J]. Journal of machine learning research, 2008, 9(Nov): 2579-2605.)\fi to reduce the lifetime data from 28 dimensions to 2 dimensions, and then perform K-means clustering. We present figures of different choice of number of clusters, $K=2$ and $K=5$, respectively. As we can see in the figure, the original multi-dimensional data can be well clustered after dimension reduction. The clustering results, including results from other choices of $K$, are used in following experiments.
2480
2481\begin{figure}[H]
2482\centering
2483%\includegraphics{TSNE_1.png}
2484\includegraphics[width=\columnwidth]{2D_tsne.jpg}
2485\caption{\footnotesize Visualization of dimension reduction by tSNE algorithm and clustering by K-means algorithms, into 2 clusters on the left and 5 clusters on the right. Note that the two figures are produced by different data.}
2486\label{figure_TSNE_1}
2487\end{figure}
2488
2489\subsection{System design variation with different parameters}
2490
2491\begin{figure}[H]
2492\centering
2493\includegraphics[scale=0.8]{KVARIATION11.pdf}
2494\caption{ The multi-type series-parallel system we are experimenting in this section. It consists of 3 subsystems, with 1 type of active parallel component and 2 types of cold-standby component in each subsystem. Each type of component can have up to 3 redundancies.}
2495\label{figure1}
2496\end{figure}
2497
2498In this subsection we experiment on adjusting parameters of the model and observe how system design $x$ changed accordingly.  we consider the following setting: a series-parallel system with  subsystems ($|\mathbf{N}|=3$), with each subsystem containing 3 types of components ($|\mathbf{M}_i|= 3, \forall i \in \mathbf{N}$), among which 1 types are active-parallel and 2 types are cold-standby, and each type is of 3 redundant components ($|\mathbf{T}_{ij}|=3, \forall i \in \mathbf{N}, j\in \mathbf{M}_i$). The lifetime requirement $\mathcal{T}_S = 29$. $\epsilon_{ij} = 0.05,\forall i \in [3], j \in [3]$. The parameters we are adjusting includes $K \in [1,5, 10], R_0 \in \{0.95, 0.97, 0.99\}$, and $\Omega$ to adjust $\underline{\hmu}$ and $\overline{\hmu}$:.
2499$$
2500\underline{\mu}^{k}_{ij}={\nu}^{k}_{ij}-\mathcal{R},
2501\quad \overline{\mu}^{k}_{ij}={\nu}^{k}_{ij}+\mathcal{R} ,
2502$$
2503where $\mathcal{R}\in \{0.025, 0.05, 0.075, 0.1\}$.  Therefore, by changing the values of $\mathcal{R}$, we can have different sets of parameters $\underline{\hmu}$ and $\overline{\hmu}$. Specifically, large values of $\mathcal{R}$ correspond to the large gaps between $\underline{\hmu}$ and $\overline{\hmu}$.
2504
2505\begin{table}[!htbp]\label{d-table}\scriptsize%\footnotesize%\small%\begin{table}[!htbp]\label{d-table}\scriptsize%\footnotesize%\small%
2506\caption{ \footnotesize  The design table for different $K$ under $\mathcal{T}_S = 29$}
2507\begin{center}
2508\begin{adjustbox}{angle=270}
2509\scalebox{1}{
2510\begin{tabular}{|c|c|c| c|| ccccc|| ccccc|| ccccc||}\hline
2511 \multirow{3}{*}{$\epsilon$} & \multirow{3}{*}{$\mathcal{R}$} & \multirow{3}{*}{Subsystem} & \multirow{3}{*}{Type} & \multicolumn{5}{c||}{$R_0 = 0.95$} & \multicolumn{5}{c||}{$R_0 = 0.97$} & \multicolumn{5}{c||}{$R_0 = 0.99$}\\
2512 \cline{5-19}
2513  &&  &  & \multicolumn{5}{c||}{$ K$} & \multicolumn{5}{c||}{$K$} & \multicolumn{5}{c||}{$ K$}\\
2514      &&&&1 & 3 & 5 &  8 &  10 &1 & 3 & 5 &  8 &  10 & 1 & 3 & 5 &  8 &  10  \\
2515     \hline
2516                    &&& AP & 1 & 1 & 1 &1 & 1 & 0 &0 & 0 & 0 & 1 & 1 & 1 & 1 &1 & 1\\
2517        &&1         & CS-I & 1 & 1 & 1 &1 & 1 & 2 & 2 & 2 & 2& 1 & 2 & 2 & 2 &2 & 2\\
2518                    &&& CS-II & 0 & 0 & 0 &0 & 0 & 0 & 0 & 0 &0 & 0& 0 & 0 & 0 &0 & 0\\
2519     \cline{3-19}
2520                    &&& AP & 1 & 1 & 1 &1 & 1 & 1 & 1 & 1 &1 & 1& 1 & 1 & 1 &1 & 1\\
2521        &0.025&2    & CS-I & 0& 0 & 0 & 0 & 1 & 0 & 0 & 0 &0& 0& 0 & 0 & 0 &0 & 0\\
2522                    &&& CS-II & 2 & 2 & 2 &2 & 2 & 2 & 2 & 2 &2 & 2& 2 & 2 & 2 &2 & 2\\
2523     \cline{3-19}
2524                    &&& AP &0 & 0 & 1& 1 & 1 & 0 &1 & 1 &1 & 1 & 1 &0 &0 &0 & 0\\
2525        &&3         & CS-I & 1 & 1& 0& 0 & 0 & 1 & 0 & 0& 0 & 1 & 1 & 1 & 1 &1 & 1\\
2526                    &&& CS-II & 3 & 3 & 3 &3 & 3&  3  & 3 & 3& 3 & 3& 3 & 3 & 3 &3 & 3\\
2527\cline{2-19}
2528\multicolumn{1}{|c|}{} & \multicolumn{3}{c||}{Design cost (k\$)} & 39.5& 39.5&\!\! 39    &39& 39 & 44.5&44&44&\!\!44 & 41 &55 & 54.5 &\!\! 54.5 &54.5& 54.5 \\
2529     \cline{2-19}
2530                    &&& AP& 1 & 1 & 1 &1 & 1 & 0 & 0 & 0 &0 & 0 & 1& 1 & 1 & 1 &1\\
2531        &&1         & CS-I& 1 & 1 & 1 &1 & 1 & 2 & 2 & 2 &2 & 2& 2 & 2 & 2 &2 & 2 \\
2532                    &&& CS-II & 0 & 0 & 0 &0 & 0 & 0 & 0 & 0 &0 & 0& 0 & 0 & 0 &0& 0\\
2533     \cline{3-19}
2534                    &&& AP & 1 & 1 & 1 &1 & 1 & 1 & 1 & 1 &1 & 1 & 1 & 1 & 1 &1 & 1 \\
2535        &0.05&2     & CS-I &1 & 0 & 0 &0 & 0 & 0 & 0 & 0 &0 & 0& 0 & 0 & 0 &0 & 0\\
2536                    &&& CS-II & 2 & 2 & 2 &2 & 2 & 2 & 2 &2 &2 &2& 2 & 2 & 2 &2 &2\\
2537     \cline{3-19}
2538                    &&& AP & 1 & 1 & 1 &1 & 0 & 1& 1 & 0& 0 & 0 & 1 &1 & 1 &1 & 1\\
2539        &&3         & CS-I & 3 & 3 & 3 &3 & 1 & 3 & 3 & 1& 1 & 1 &  3 & 3 & 3 & 3 & 3\\
2540                    &&& CS-II & 1 & 1 & 1 &1 & 3 & 1 & 1& 3& 3 & 3 & 1 & 1 & 1 &1 & 1\\
2541\cline{2-19}
2542\multicolumn{1}{|c|}{0.05} & \multicolumn{3}{c||}{Design cost (k\$)} & 40&40 &\!\! 40  &40& 39.5 & 45&45&\!\! 44.5 & 44.5 & 44.5 & 55&55&\!\! 55  & 55 & 55\\
2543     \cline{2-19}
2544                     &&& AP& 1& 1 & 1 & 1 &1 & 0 & 0 & 0 &0 & 0 & 1& 1 & 1 & 1 &1\\
2545        &&1           &CS-I2 & 1& 1 & 1 & 1 &1 & 2 & 2 & 2 &2 & 2& 2 & 2 & 2 &2 & 2\\
2546                    &&& CS-II & 0 & 0 & 0 &0 & 0 & 0 & 0 & 0 &0 & 0& 0 & 0 & 0 &0 & 0 \\
2547     \cline{3-19}
2548                    &&& AP & 1& 1 & 1 & 1 &1 & 1 &1 & 1 & 1 &1 & 1& 1 & 1 & 1 &1  \\
2549        &0.075&2    & CS-I & 0 & 0 & 0 &0 & 0 & 0 & 0 & 0 &0 & 0& 0 & 0 & 0 &0 & 0 \\
2550                    &&& CS-II & 2 & 2 & 2 &2 & 2 & 2 & 2 & 2 &2 & 2& 2 & 2 & 2 &2 & 2 \\
2551     \cline{3-19}
2552                    &&& AP & 1& 1 & 1 & 1 &1  & 1 &1 & 1 & 1 &1 & 1& 1 & 1 & 1 &1\\
2553        &&3           & CS-I & 3 & 3 & 3 & 3 & 3 & 3 & 3 & 3 &3 & 3& 3 & 3 & 3 &3 & 3\\
2554                    &&& CS-II & 1& 1 & 1 & 1 &1  & 1 &1 & 1 & 1 &1 & 1& 1 & 1 & 1 &1\\
2555\cline{2-19}
2556\multicolumn{1}{|c|}{} & \multicolumn{3}{c||}{Design cost (k\$)} & 40&40&\!\! 40 & 40 & 40 & 45&45&\!\! 45& 45&45 & 55&55 &\!\! 55    & 55& 55\\
2557
2558     \cline{2-19}
2559                    &&& AP& 1& 1 & 1 & 1 &1 & 0 & 0 & 0 &0 & 0 & 1& 1 & 1 & 1 &1\\
2560
2561        &&1         & CS-I & 1& 1 & 1 & 1 &1 & 2 & 2 & 2 &2 & 2& 2 & 2 & 2 &2 & 2\\
2562                    &&&CS-II3 & 0 & 0 & 0 &0 & 0 & 0 & 0 & 0 &0 & 0& 0 & 0 & 0 &0 & 0\\
2563     \cline{3-19}
2564                    &&& AP & 1& 1 & 1 & 1 &1  &  1 &1 & 1 & 1 &1 & 1& 1 & 1 & 1 &1 \\
2565        &0.1&2          & CS-I & 0 & 0 & 0 &0 & 0 & 0 & 0 & 0 &0 & 0& 0 & 0 & 0 &0 & 0 \\
2566                    &&& CS-II & 2 & 2 & 2 &2 & 2 & 2 & 2 & 2 &2 & 2& 2 & 2 & 2 &2 & 2\\
2567     \cline{3-19}
2568                    &&& AP &1 & 1& 1 & 1 & 1 & 1  & 1 & 1 & 1 &1 & 1& 1 & 1 & 1 &1 \\
2569        &&3         & CS-I & 3 & 3 & 3 &3 & 3 & 3 & 3 & 3 &3 & 3& 3 & 3 & 3 &3 & 3\\
2570                    &&& CS-II& 1& 1 & 1 & 1 &1  & 1 & 1 & 1 & 1 &1 & 1& 1 & 1 & 1 & 1 \\
2571\cline{2-19}
2572\multicolumn{1}{|c|}{} & \multicolumn{3}{c||}{Design cost (k\$)} & 40&\!\! 40   & 40&40 & 40 & 45&45&\!\! 45 &45& 45 & 55&55&\!\! 55 & 55 & 55 \\
2573\hline
2574\end{tabular}}
2575\end{adjustbox}
2576\end{center}
2577\end{table}
2578
2579%The resulting system design in the case of $\epsilon_{ij} = 0.05$ are shown in the table. We leave the rest of experiment results in the Electronic Companion.
2580
2581Note that when $K = 1$, the model reduces to the case of robust model without clustering by Wang, et. al.
2582
2583The observations of the experimental results are the following:
2584(i) the cost increases when the variation range $[\underline{\hmu}, \overline{\hmu}]$ of expected lifetimes increases, or the dispersion parameter $\epsilon$ of the lifetimes of components enlarges. Such increased cost is due to the enlarged ambiguity set $\mathbb{F}_K$ resulting from the change of above distributional parameters $([\underline{\hmu}, \overline{\hmu}]$. (ii) Intuitively, cost also increases if the required reliability level $R_0$ is increased.
2585(iii) The cost decreases as number of clusters $K$ increases. This is due to to the fact that since the mean and dispersion information of each cluster are included in the ambiguity set, more clusters implies more information and therefore smaller ambiguity set.
2586
2587We then perform out-of-sample tests on the designs $K = 1, 5, 10$. To test the robustness of the design, we generate testing data with smaller lifetime mean and larger standard deviation. In particular, we let $\hmu_{test} = (1 - \Delta_{M})\hmu_{train}$, and $\hsigma_{test} = \Delta_{S}\hsigma_{train}$. $\Delta_M$ is set to 8\%, and $\Delta_S$ is set to 10\%, 20\% and 30\%, respectively.
2588
2589\begin{table}[htp]\footnotesize%\small%
2590\caption{\label{tab-compare2} \footnotesize The out of sample reliability of designs generated by $K = 1, 5,10$ models under different $\Delta_{s}$. $R_0$ is set to $0.95$. In the design columns, the 3 columns are corresponding to different subsystems. The three numbers in each column indicates the number of redundancies used that is active parallel, cold-standby type I, or cold-standby type II, respectively. }
2591\begin{center}
2592\begin{tabular}{|c||c|c|| c|c|c| c |  c |c   |}\hline
2593\multirow{2}{*}{$\mathcal{T}_S$}  & \multirow{2}{*}{$(\Delta_{M}, \Delta_{S})$} & \multirow{2}{*}{Model} & \multicolumn{3}{c|}{Design} & \multirow{2}{*}{cost} & \multirow{2}{*}{Mean of out-of-sample reliability level} & \multirow{2}{*}{StD} \\
2594\cline{4-6}
2595&&& AP & CS-I & CS-II &&&\\
2596\hline
2597\multirow{9}{*}{29} & \multirow{3}{*}{(8\%, 10\%)} &  K=1 Model & (0,2,0) & (1,3,0) & (0,1,3)& 45.5 &0.976 & 0.152   \\
2598     & & K=5 Model & (0,2,0) & (1,2,0) & (1,2,2)& 45.0 &0.959 & 0.198 \\
2599     & & K=10 Model & (1,1,0) & (1,3,0) & (1,1,3)& 42.0 &0.607 & 0.489  \\
2600    \cline{2-9}
2601     &\multirow{3}{*}{(8\%, 20\%)}  &  K=1 Model & (0,2,0) & (1,3,0) & (0,1,3)& 45.5  &  0.972 & 0.167   \\
2602     & & K=5 Model &  (0,2,0) & (1,2,0) & (1,2,2)& 45.0&0.953 & 0.211 \\
2603     & & K=10 Model &  (1,1,0) & (1,3,0) & (1,1,3)& 42.0  &0.620 & 0.485  \\
2604    \cline{2-9}
2605     &\multirow{3}{*}{(8\%, 30\%)}  &  K=1 Model & (0,2,0) & (1,3,0) & (0,1,3)& 45.5 &  0.964 & 0.186   \\
2606     & & K=5 Model & (0,2,0) & (1,2,0) & (1,2,2)& 45.0 &0.947 & 0.223 \\
2607     & & K=10 Model &  (1,1,0) & (1,3,0) & (1,1,3)& 42.0 &0.614 & 0.487  \\
2608\hline
2609
2610\end{tabular}
2611\end{center}
2612\end{table}
2613
2614\begin{figure}[H]
2615\centering
2616\includegraphics[width=\columnwidth]{Out_of_sample_K.png}
2617\caption{\footnotesize Figure (a)(b)(c) represents the out of sample reliability of designs generated by $K = 1, 5, 10$ models under different $\Delta_{S}$, respectively. The vertical beam represents $\mathcal{T}_S $. The fraction of the lifetime histogram on the right side of beam represents the out-of-sample reliability level.}
2618\label{figureK}
2619\end{figure}
2620
2621From the results, we can observe that with moderate number of clusters ($K = 5$), we can obtain designs with less costs than designs generated by robust model with no clustering ($K = 1$), while retaining robustness even when there is a significant shrink in mean lifetime and much larger standard deviation. This shows that our framework can produce designs that are robust enough and cheaper, by the incorporation of clustering. However, if the number of clusters becomes too large ($K = 10$), the out-of-sample reliability drops significantly. The possible reason is that when $K$ is too high, the model has to split natural clusters into smaller ones, which means that it learns unnecessary information and overfits. Thus, it is crucial to choose the optimal $K$. We present an experiment of choosing $K$ by cross validation in the next subsection.
2622
2623\subsection{Choosing $K$ by cross validation}
2624In this subsection, we present a cross validation experiment by applying Algorithm 2. In particular, we choose $m = 10$ and do a 10-fold cross validation. The number of instances of constraints violation, as well as the cost of designs, are plotted in the figure below on the left.  \iffalse We also offers a combined metric for cross validation. First, the cost and number of validation are both normalized to the range $[0, 1]$. Then, compute $(1-\lambda)cost(K) + \lambda{{\#}violation(K)}$, where $\lambda \in [0,1]$. By assigning different $\lambda$, we can adapt to scenarios in of different cost-violation tradeoffs. In particular, high $\lambda$ means that robustness of the design is more valued than the cost; low $\lambda$ indicates the contrary. \fi The combined metric under different $\lambda$ are plotted in the figure below on the right. Observe that when $\lambda$ is low, large $K$ such as 7 and 9 are preferred; when $\lambda$ is high, moderate $K$ like 5 are better. Since large $K$ generally correspond to less cost, this result matches the intuition that people are willing to pay more cost if robustness is more valued.
2625 %least constraint violation occurs, so $K = 5$ is the ideal parameter to cluster this data set. $K = 5$ will be used in the following subsections.
2626\iffalse
2627\begin{figure}[H]
2628\begin{subfigure}{0.5\textwidth}
2629\includegraphics[scale=0.65]{cross_validation.png}
2630\caption{\footnotesize}
2631\label{figure4-1}
2632\end{subfigure}
2633\begin{subfigure}{0.5\textwidth}
2634\centering
2635\includegraphics[scale=0.37]{cross_validation_lambda.png}
2636\caption{\footnotesize }
2637\label{figure4-2}
2638\end{subfigure}
2639\caption{\footnotesize (a) The number of violations and costs with different $K$. (b) Costs penalized by $\lambda$ with different $K$, with each line associated with a different $\lambda$.}
2640\end{figure}
2641\fi
2642
2643\begin{figure}[H]
2644\centering
2645\includegraphics[width=\columnwidth]{cv.jpg}
2646\caption{\footnotesize (a) The number of violations and costs with different $K$. (b) Costs penalized by $\lambda$ with different $K$, with each line associated with a different $\lambda$.}
2647\label{figure4-2}
2648\end{figure}
2649
2650
2651
2652
2653\iffalse
2654\begin{table}[h*]\footnotesize%\small%
2655\caption{\label{tab-compare2} \footnotesize  Out-of-sample reliability level comparison ($R_0=0.85$), where `Design' specifies the number of components allocated in each of 3 subsystems, and `P-Model' and `C-DRO-Model' refer to the probabilistic model and robust model, respectively. }
2656\begin{center}
2657\begin{tabular}{|c||c|c|| c| c |c |c   |}\hline
2658    $L_S$ & Out of sample $\sigma$ &Model & Design &   Designed reliability level &Mean of out-of-sample reliability level & StD \\
2659     \hline
2660    \multirow{6}{*}{7.625} & \multirow{2}{*}{2}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.85$}& 0.834 & 0.372   \\
2661     && C-DRO-Model & (1,1,3) &  &0.997 & 0.053  \\
2662        \cline{2-7}
2663     &\multirow{2}{*}{6}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.85$}& 0.724 & 0.447   \\
2664     && C-DRO-Model & (1,1,3) &  &0.982 & 0.132  \\
2665        \cline{2-7}
2666     &\multirow{2}{*}{10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.85$}& 0.673 & 0.469   \\
2667     && C-DRO-Model & (1,1,3) &  &0.971 & 0.168  \\
2668\hline
2669    \multirow{6}{*}{7.75} & \multirow{2}{*}{2}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.85$}& 0.781 & 0.414   \\
2670     && C-DRO-Model & (1,1,4) &  &0.999 & 0.028  \\
2671                 \cline{2-7}
2672     &\multirow{2}{*}{6}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.85$}& 0.690 & 0.462   \\
2673     && C-DRO-Model & (1,1,4) &  &0.991 & 0.095  \\
2674                  \cline{2-7}
2675     &\multirow{2}{*}{10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.85$}& 0.637 & 0.481   \\
2676     && C-DRO-Model & (1,1,4) &  &0.989 & 0.105  \\
2677\hline
2678    \multirow{6}{*}{7.875} & \multirow{2}{*}{2}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.85$}& 0.716 & 0.451   \\
2679     && C-DRO-Model & (1,1,7) &  &1.000 & 0.000  \\
2680               \cline{2-7}
2681     &\multirow{2}{*}{6}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.85$}& 0.640 & 0.480   \\
2682     && C-DRO-Model & (1,1,7) &  &0.998 & 0.040  \\
2683               \cline{2-7}
2684     &\multirow{2}{*}{10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.85$}& 0.602 & 0.490   \\
2685     && C-DRO-Model & (1,1,7) &  &0.999 & 0.035  \\
2686\hline
2687\end{tabular}
2688\end{center}
2689\end{table}
2690
2691\begin{table}[h*]\footnotesize%\small%
2692\caption{\label{tab-compare2} \footnotesize  Out-of-sample reliability level comparison ($R_0=0.90$), where `Design' specifies the number of components allocated in each of 3 subsystems, and `P-Model' and `C-DRO-Model' refer to the probabilistic model and robust model, respectively. }
2693\begin{center}
2694\begin{tabular}{|c||c|c|| c| c |c |c   |}\hline
2695    $L_S$ & Out of sample $\sigma$ &Model & Design &   Designed reliability level &Mean of out-of-sample reliability level & StD \\
2696     \hline
2697    \multirow{6}{*}{7.625} & \multirow{2}{*}{2}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.618 & 0.486   \\
2698     && C-DRO-Model & (1,1,3) &  &0.998 & 0.047  \\
2699        \cline{2-7}
2700     &\multirow{2}{*}{6}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.576 & 0.494   \\
2701     && C-DRO-Model & (1,1,3) &  &0.989 & 0.103   \\
2702        \cline{2-7}
2703     &\multirow{2}{*}{10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.560 & 0.496   \\
2704     && C-DRO-Model & (1,1,3) &  &0.988 & 0.111   \\
2705\hline
2706    \multirow{6}{*}{7.75} & \multirow{2}{*}{2}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.528 & 0.499   \\
2707     && C-DRO-Model & (1,1,4) &  &0.999 & 0.037   \\
2708                 \cline{2-7}
2709     &\multirow{2}{*}{6}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.538 & 0.499   \\
2710     && C-DRO-Model & (1,1,4) &  &0.996 & 0.063   \\
2711                  \cline{2-7}
2712     &\multirow{2}{*}{10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.530 & 0.499   \\
2713     && C-DRO-Model & (1,1,4) &  &0.994 & 0.080   \\
2714\hline
2715    \multirow{6}{*}{7.875} & \multirow{2}{*}{2}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.458 & 0.498   \\
2716     && C-DRO-Model & (1,1,7) &  &0.996 & 0.060  \\
2717               \cline{2-7}
2718     &\multirow{2}{*}{6}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.481 & 0.500   \\
2719     && C-DRO-Model & (1,1,7) &  &0.996 & 0.060   \\
2720               \cline{2-7}
2721     &\multirow{2}{*}{10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.496 & 0.500   \\
2722     && C-DRO-Model & (1,1,7) &  &0.995 & 0.069  \\
2723\hline
2724\end{tabular}
2725\end{center}
2726\end{table}
2727
2728\begin{table}[h*]\footnotesize%\small%
2729\caption{\label{tab-compare2} \footnotesize  Out-of-sample reliability level comparison ($R_0=0.90$), where `Design' specifies the number of components allocated in each of 3 subsystems, and `P-Model' and `C-DRO-Model' refer to the probabilistic model and robust model, respectively. }
2730\begin{center}
2731\begin{tabular}{|c||c|c|| c| c |c |c   |}\hline
2732    $L_S$ & Out of sample $\sigma$ &Model & Design &   Designed reliability level &Mean of out-of-sample reliability level & StD \\
2733     \hline
2734    \multirow{6}{*}{7.625} & \multirow{2}{*}{2}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.143 & 0.350   \\
2735     && C-DRO-Model & (1,1,3) &  &0.653 & 0.476  \\
2736        \cline{2-7}
2737     &\multirow{2}{*}{6}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.576 & 0.494   \\
2738     && C-DRO-Model & (1,1,3) &  &0.989 & 0.103   \\
2739        \cline{2-7}
2740     &\multirow{2}{*}{10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.560 & 0.496   \\
2741     && C-DRO-Model & (1,1,3) &  &0.988 & 0.111   \\
2742\hline
2743    \multirow{6}{*}{7.75} & \multirow{2}{*}{2}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.528 & 0.499   \\
2744     && C-DRO-Model & (1,1,4) &  &0.999 & 0.037   \\
2745                 \cline{2-7}
2746     &\multirow{2}{*}{6}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.538 & 0.499   \\
2747     && C-DRO-Model & (1,1,4) &  &0.996 & 0.063   \\
2748                  \cline{2-7}
2749     &\multirow{2}{*}{10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.530 & 0.499   \\
2750     && C-DRO-Model & (1,1,4) &  &0.994 & 0.080   \\
2751\hline
2752    \multirow{6}{*}{7.875} & \multirow{2}{*}{2}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.458 & 0.498   \\
2753     && C-DRO-Model & (1,1,7) &  &0.996 & 0.060  \\
2754               \cline{2-7}
2755     &\multirow{2}{*}{6}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.481 & 0.500   \\
2756     && C-DRO-Model & (1,1,7) &  &0.996 & 0.060   \\
2757               \cline{2-7}
2758     &\multirow{2}{*}{10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.496 & 0.500   \\
2759     && C-DRO-Model & (1,1,7) &  &0.995 & 0.069  \\
2760\hline
2761\end{tabular}
2762\end{center}
2763\end{table}
2764
2765\begin{table}[h*]\footnotesize%\small%
2766\caption{\label{tab-compare2} \footnotesize  Out-of-sample reliability level comparison ($R_0=0.90$), where `Design' specifies the number of components allocated in each of 3 subsystems, and `P-Model' and `C-DRO-Model' refer to the probabilistic model and robust model, respectively. }
2767\begin{center}
2768\begin{tabular}{|c||c|c|| c| c |c |c   |}\hline
2769    $L_S$ & Out of sample $\sigma$ &Model & Design &   Designed reliability level &Mean of out-of-sample reliability level & StD \\
2770     \hline
2771    \multirow{6}{*}{7.625} & \multirow{2}{*}{2}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.348 & 0.476   \\
2772     && C-DRO-Model & (1,1,3) &  &0.930 & 0.256  \\
2773        \cline{2-7}
2774     & \multirow{2}{*}{6}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.427 & 0.495   \\
2775     && C-DRO-Model & (1,1,3) &  &0.948 & 0.221  \\
2776        \cline{2-7}
2777     &\multirow{2}{*}{10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.445 & 0.497   \\
2778     && C-DRO-Model & (1,1,3) &  &0.957 & 0.203   \\
2779\hline
2780    \multirow{6}{*}{7.75} & \multirow{2}{*}{2}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.278 & 0.448   \\
2781     && C-DRO-Model & (1,1,4) &  &0.933 & 0.250   \\
2782                 \cline{2-7}
2783     &\multirow{2}{*}{6}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.381 & 0.486   \\
2784     && C-DRO-Model & (1,1,4) &  &0.974 & 0.159   \\
2785                 \cline{2-7}
2786     &\multirow{2}{*}{10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.409 & 0.492   \\
2787     && C-DRO-Model & (1,1,4) &  &0.979 & 0.143   \\
2788\hline
2789    \multirow{6}{*}{7.875} & \multirow{2}{*}{2}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.214 & 0.410   \\
2790     && C-DRO-Model & (1,1,7) &  &0.909 & 0.287  \\
2791               \cline{2-7}
2792     & \multirow{2}{*}{6}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.345 & 0.475   \\
2793     && C-DRO-Model & (1,1,7) &  &0.973 & 0.163  \\
2794               \cline{2-7}
2795     &\multirow{2}{*}{10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.90$}& 0.383 & 0.486   \\
2796     && C-DRO-Model & (1,1,7) &  &0.985 & 0.122   \\
2797\hline
2798\end{tabular}
2799\end{center}
2800\end{table}
2801\fi
2802
2803\iffalse
2804From the experiment result, we can observe that despite a smaller ambiguity set, our design ($x^{(2)}$) can achieve robustness level that is comparable to the design without clustering $x^{(1)}$, and are far better than the baseline probabilistic model ($x^{(3)}$).
2805\fi
2806
2807
2808\subsection{Value of side information}
2809In this subsection we experiment on clustering according to side information. The system we are studying is the same as the one in Section 5.2. We choose $K = 5$, corresponding to $\lambda = 0.8$ in the cross validation section. When generating samples from distributions, we also obtain the side information of which of the 5 distributions the sample is drawn from. We then cluster the data set by $K = 5$, based solely on the side information, and compute parameters $(\hnu, [\underline{\hmu}, \overline{\hmu}], \bm{\hsigma}, \p)$ of the model from it. We obtain design ($x^{(2)}$) from the model, and compare it with design ($x^{(1)}$) obtained from the model in which $K$ is also 5, but is directly clustered based on lifetime information instead of the side information.
2810
2811\begin{table}[H]\label{d-table}\scriptsize%\footnotesize%\small%
2812\caption{ \footnotesize  The design table for $K = 5$ model with and without side information (S.I)}
2813\begin{center}
2814\begin{tabular}{|c|c| c|| ccccc || ccccc || ccccc |}\hline
2815\multirow{3}{*}{S.I} & \multirow{3}{*}{Subsystem} & \multirow{3}{*}{Type} & \multicolumn{5}{c||}{$R_0=0.95$ }& \multicolumn{5}{c||}{$R_0=0.97$}& \multicolumn{5}{c|}{$R_0=0.99$} \\
2816  \cline{4-18}
2817 &&&\multicolumn{5}{c||}{$\mathcal{T}_S $ (yrs) }& \multicolumn{5}{c||}{$\mathcal{T}_S $ (yrs)}& \multicolumn{5}{c||}{$\mathcal{T}_S $ (yrs)}\\
2818
2819      &&&28 & 28.5 & 29 &  29.5 & 30 &28 & 28.5 & 29 &  29.5 & 30 &28 & 28.5 & 29 &  29.5 & 30  \\
2820     \cline{1-18}
2821                    && AP & 0 & 0 &0 & 0 & 0  & 0 & 0 &1 & 1 & 1    & 1 & 1 & 0 & 1 & 1 \\                       &
2822        1            & CS-l & 2 & 2 &2 & 2 & 2  & 2 & 2 &2 & 2 & 2    & 3 & 3 & 3 & 3 & 3 \\
2823                    &&CS-ll & 3 & 0 &0 & 0 & 0  & 0 & 0 &0 & 0 & 0    & 0 & 0 & 0 & 0 & 0 \\
2824     \cline{2-18}
2825                    && AP  & 2 & 2 &2 & 1 & 3  & 2& 1 &2 & 2 & 2    & 1 & 1 & 1 & 1 & 1 \\ With  &
2826        2            & CS-l & 3 & 3 &0 & 3 & 0  &0 & 2 &3 & 0 & 0   & 3 & 3 & 3& 3& 3 \\ S.I &
2827                     & CS-ll & 0 & 0 &2 & 0 & 3  & 3 & 3 &0 & 3 & 3    & 3 & 3& 3 & 3 & 3 \\
2828     \cline{2-18}
2829                    && AP & 1 & 1 &1 & 1 & 1    & 1 & 0 &1 & 1 & 1    & 1 & 1 & 1 & 1 & 1 \\                       &
2830        3            & CS-l & 0 & 0 &1 & 1 & 1   & 2 & 3 &1 & 1 & 1    & 3 & 3 & 3 & 3 & 3 \\
2831                    && CS-ll & 3 & 3 &3 & 3 & 3   & 3 & 3 &3 & 3 & 3    & 3 & 3 & 3 & 3 & 3 \\
2832\hline
2833\multicolumn{3}{|c||}{Design cost (k\$)} & 45.5& 45.5 &47&\!\! 46.5 &\!\! 50    & 51 & 54.5 &57.5& 59 & 59   & 81.5 & \!\! 81.5\!\! &81.5\!\!& 81.5\!\! &81.5 \\
2834\hline
2835\multicolumn{3}{|c||}{Cost saved (k\$)} & 1 & 1 &1.5&\!\! 4 &\!\! 4.5    & 3.5 & 4.5 &2& 0.5 & 2    & 0 & \!\! 0\!\! &25\!\!& 15\!\! &15 \\
2836\cline{1-18}
2837     \cline{2-18}
2838                    && AP & 0 & 0 &0 & 0 & 0  & 0 & 1&1 & 1 & 1    & 1 & 1 & 1 & 1 & 1 \\                          &
2839        1            & CS-l & 2 & 2 &2 & 2 & 2  & 2 & 2 &2 & 2 & 2    & 3 & 3 & 3 & 3 & 3 \\
2840                    && CS-ll & 0 & 0 &0 & 0 & 0  & 0 & 0 &0 & 0 & 0    & 0 & 0 & 1 & 1 & 1 \\
2841     \cline{2-18}
2842                    && AP & 1 & 1 &3 & 3 & 1 & 1 & 2 &2 & 2 & 2    & 1 & 1 & 1 & 1 & 1 \\ Without &
2843        2            & CS-l & 3 & 3 &3 & 3 & 3  & 3 & 0 &3 & 3 & 0    & 3 & 3& 3 & 3 & 3 \\ S.I &
2844                     & CS-ll & 0 & 0 &0 & 0 & 3  & 3 & 3 &0 & 0 & 3    & 3 & 3 & 3 & 3 & 3 \\
2845     \cline{2-18}
2846                    && AP  & 1 & 1 &1 & 1 & 1  & 1 & 1 &1  & 1    & 1  & 1& 1 & 1 & 1 & 1 \\                          &
2847       3             & CS-l & 1 & 1 &1 & 1 & 2  & 2 & 1 &2  & 2    & 2  & 3& 3 & 2 & 2 & 2 \\
2848                    && CS-ll& 3 & 3 &3 & 3   &3  & 3 & 3 &3 & 3    & 3  & 3& 3 & 3 & 3 & 3 \\
2849\hline
2850\multicolumn{3}{|c||}{Design cost (k\$)} & 46.5& 46.5 &48.5&\!\!50.5 &\!\! 54.5 &54.5     & 59 & 59.5 &59.5& 61 & 81.5    & 81.5\!\! & \!\! 96.5\!\! &96.5\!\!& 96.5\\
2851\hline
2852\end{tabular}
2853\end{center}
2854\end{table}
2855
2856
2857
2858\begin{table}[htp]\footnotesize%\small%
2859\caption{\label{tab-compare3} \footnotesize The out-of-sample result of designs obtained at $\mathcal{T}_S  = 29$. $R_0$ is set to $0.95$. In the design columns, the 3 columns are corresponding to different subsystems. The three numbers in each column indicates the number of redundancies used that is active parallel, cold-standby type I, or cold-standby type II, respectively. }
2860\begin{center}
2861\begin{tabular}{|c|c|c||c|c|c|| c|c|c|}\hline
2862 \multirow{2}{*}{($\Delta_{M}, \Delta_{S}$)} &\multirow{2}{*}{Model}  &  Reliability & \multirow{2}{*}{($\Delta_{M}, \Delta_{S}$)} &\multirow{2}{*}{Model}  &  Reliability& \multirow{2}{*}{($\Delta_{M}, \Delta_{S}$)} &\multirow{2}{*}{Model}  &  Reliability \\
2863&& level &&& level &&& level \\
2864\hline
2865
2866       \multirow{2}{*}{(5\%, 10\%)}&  With S.I   &0.9999& \multirow{2}{*}{(8\%, 10\%)}&  With S.I& 0.994& \multirow{2}{*}{(10\%, 10\%)} &  With S.I &  0.9626   \\
2867      & Without S.I &1.0& & Without S.I &  0.9989&& Without S.I &0.9877  \\
2868    \hline
2869     \multirow{2}{*}{(5\%, 15\%)}  & With S.I& 0.9999 &\multirow{2}{*}{(8\%, 15\%)}  &  With S.I  & 0.9939&\multirow{2}{*}{(10\%, 15\%)}  &  With S.I & 0.9557  \\
2870     & Without S.I & 1.0& & Without S.I & 0.9985  & & Without S.I & 0.9883\\
2871    \hline
2872      \multirow{2}{*}{(5\%, 20\%)}& With S.I  & 1.0 &\multirow{2}{*}{(8\%, 20\%)}  &  With S.I & 0.994  &\multirow{2}{*}{(10\%, 20\%)}  &  With S.I & 0.9588 \\
2873      & Without S.I &1.0 & & Without S.I &  0.9986& & Without S.I & 0.9853\\
2874    \hline
2875      \multirow{2}{*}{(5\%, 30\%)}  &  With S.I& 0.9996 &\multirow{2}{*}{(8\%, 30\%)}  &  With S.I & 0.9919  &\multirow{2}{*}{(10\%, 30\%)}  &  With S.I &  0.9519 \\
2876     & Without S.I  &1.0  & & Without S.I  &0.9972 & & Without S.I & 0.9814 \\
2877
2878
2879\hline
2880\end{tabular}
2881
2882\end{center}
2883\end{table}
2884
2885
2886
2887
2888
2889
2890\iffalse
2891\begin{tabular}{|c|c||c|c|| c|c||c| c||}\hline
2892\multirow{2}{*}{$L_S$} & \multirow{2}{*}{($\Delta_{M}, \Delta_{S}$)} &\multirow{2}{*}{Model} & \multicolumn{3}{c|}{Design} & \multirow{2}{*}{cost}  & \multirow{2}{*}{Mean of out-of-sample reliability level} & \multirow{2}{*}{StD} \\
2893\cline{4-6}
2894&&& AP & CS-I & CS-II &&&\\
2895\hline
2896\multirow{25}{*}{29} & \multirow{2}{*}{(5\%, 10\%)} &  With S.I & (0,2,0) & (2,3,0) & (0,2,3)& 49 &0.9999 & 0.01   \\
2897     & & Without S.I & (0,2,0) & (2,3,0) & (0,3,3)& 51  &1.0 & 0.0 \\
2898    \cline{2-9}
2899     &\multirow{2}{*}{(5\%, 15\%)}  &  With S.I & (0,2,0) & (2,3,0) & (0,2,3)& 49 & 0.9999 & 0.01   \\
2900     & & Without S.I & (0,2,0) & (2,3,0) & (0,3,3)& 51    &1.0 & 0.0 \\
2901    \cline{2-9}
2902     &\multirow{2}{*}{(5\%, 20\%)}  & With S.I &(0,2,0) & (2,3,0) & (0,2,3)& 49 & 1.0 & 0.0   \\
2903     & & Without S.I & (0,2,0) & (2,3,0) & (0,3,3)& 51    &1.0 & 0.0 \\
2904    \cline{2-9}
2905     &\multirow{2}{*}{(5\%, 30\%)}  &  With S.I & (0,2,0) & (2,3,0) & (0,2,3)& 49 & 0.9996 & 0.02   \\
2906     & & Without S.I & (0,2,0) & (2,3,0) & (0,3,3)& 51   &1.0 & 0.0 \\
2907    \clineB{2-9}{2}
2908 & \multirow{2}{*}{(8\%, 10\%)} &  With S.I & (0,2,0) & (2,3,0) & (0,2,3)& 49  &0.994 & 0.0772   \\
2909     & & Without S.I & (0,2,0) & (2,3,0) & (0,3,3)& 51    &0.9997 & 0.0173 \\
2910    \cline{2-9}
2911     &\multirow{2}{*}{(8\%, 15\%)}  &  With S.I & (0,2,0) & (2,3,0) & (0,2,3)& 49  & 0.9939 & 0.0779   \\
2912     & & Without S.I & (0,2,0) & (2,3,0) & (0,3,3)& 51  &0.9998 & 0.0141 \\
2913    \cline{2-9}
2914     &\multirow{2}{*}{(8\%, 20\%)}  &  With S.I &(0,2,0) & (2,3,0) & (0,2,3)& 49  & 0.994 & 0.0772   \\
2915     & & Without S.I & (0,2,0) & (2,3,0) & (0,3,3)& 51   &0.9998 & 0.0264 \\
2916    \cline{2-9}
2917     &\multirow{2}{*}{(8\%, 30\%)}  &  With S.I & (0,2,0) & (2,3,0) & (0,2,3)& 49 & 0.9919 & 0.0869   \\
2918     & & Without S.I &(0,2,0) & (2,3,0) & (0,3,3)& 51   &0.9988 & 0.0346 \\
2919     \cline{2-9}
2920     \clineB{2-9}{2}
2921 & \multirow{2}{*}{(10\%, 10\%)} &  With S.I & (0,2,0) & (2,3,0) & (0,2,3)& 49  &0.9633 & 0.1880   \\
2922     & & Without S.I & (0,2,0) & (2,3,0) & (0,3,3)& 51    &0.9953 & 0.0683 \\
2923    \cline{2-9}
2924     &\multirow{2}{*}{(10\%, 15\%)}  &  With S.I & (0,2,0) & (2,3,0) & (0,2,3)& 49  & 0.9563 & 0.2044   \\
2925     & & Without S.I & (0,2,0) & (2,3,0) & (0,3,3)& 51    &0.9963& 0.0607 \\
2926    \cline{2-9}
2927     &\multirow{2}{*}{(10\%, 20\%)}  &  With S.I & (0,2,0) & (2,3,0) & (0,2,3)& 49  & 0.9589 & 0.1985   \\
2928     & & Without S.I & (0,2,0) & (2,3,0) & (0,3,3)& 51   &0.9942 & 0.0759 \\
2929    \cline{2-9}
2930     &\multirow{2}{*}{(10\%, 30\%)}  &  With S.I & (0,2,0) & (2,3,0) & (0,2,3)& 49  & 0.9523 & 0.2131   \\
2931     & & Without S.I & (0,2,0) & (2,3,0) & (0,3,3)& 51    &0.991 & 0.0944 \\
2932\hline
2933\end{tabular}
2934
2935\end{center}
2936\end{table}
2937\fi
2938The result in table 3 shows that when side information is incorporated, we can achieve a design with much lower cost. We then choose $\mathcal{T}_S  = 29$, and perform out-of-sample tests, in the similar way as previous experiments. We can observe that even with this significant cost saved, the design obtained by clustering by side information still performs well enough under significant $\Delta_M$ and $\Delta_S$
2939\begin{figure}[H]
2940\centering
2941\includegraphics[scale=0.2]{Out_of_sample_side.png}
2942\caption{\footnotesize The out-of-sample test result of designs obtained at $\mathcal{T}_S = 29$, with and without S.I.  The vertical beam represents $\mathcal{T}_S $. The fraction of the lifetime histogram on the right side of beam represents the out-of-sample reliability level.}
2943\label{figure4}
2944\end{figure}
2945
2946
2947
2948
2949\iffalse
2950\begin{table}[H]\footnotesize%\small%
2951\caption{\label{tab-compare2} \footnotesize  Out-of-sample reliability level comparison ($R_0=0.95$), where `Design' specifies the number of components allocated in each of 3 subsystems, and `P-Model' and `C-DRO-Model' refer to the probabilistic model and robust model, respectively. }
2952\begin{center}
2953\begin{tabular}{|c||c|c|| c| c |c |c   |}\hline
2954    $L_S$ & $\Delta_{m}, \Delta_{s}$ &Model & Design &   Designed reliability level &Mean of out-of-sample reliability level & StD \\
2955     \hline
2956    \multirow{12}{*}{7.5} & \multirow{2}{*}{1, 5}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.95$}& 0.952 & 0.214   \\
2957     && C-DRO-Model & (1,2,8) &  &1.000 & 0.000  \\
2958        \cline{2-7}
2959     &\multirow{2}{*}{1, 10}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.95$}& 0.886 & 0.318   \\
2960     && C-DRO-Model & (1,2,8) &  &1.000 & 0.000  \\
2961        \cline{2-7}
2962     &\multirow{2}{*}{1, 20}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.95$}& 0.800 & 0.400   \\
2963     && C-DRO-Model & (1,2,8) &  &1.000 & 0.000  \\
2964\hline
2965\end{tabular}
2966\end{center}
2967\end{table}
2968
2969\begin{table}[!htbp]\label{d-table}\scriptsize%\footnotesize%\small%
2970\caption{ \footnotesize  The design table for $K = 10$ model with side information}
2971\begin{center}
2972\begin{tabular}{|c| c|| ccccc || ccccc || ccccc |}\hline
2973\multirow{2}{*}{Subsystem} & \multirow{2}{*}{Type} & \multicolumn{5}{c||}{$R_0=0.95$,~$L_S$ (yrs) }& \multicolumn{5}{c||}{$R_0=0.97$,~$L_S$ (yrs)}& \multicolumn{5}{c|}{$R_0=0.99$,~$L_S$ (yrs)} \\
2974 \cline{3-17}
2975      &&28 & 28.5 & 29 &  29.5 & 30 &28 & 28.5 & 29 &  29.5 & 30 &28 & 28.5 & 29 &  29.5 & 30  \\
2976     \hline
2977                    & 1 & 1 & 1 &1 & 0 & 0  & 0 & 0 &0 & 0 & 0    & 1 & 1 & 1 & 1 & 1 \\
2978        1           & 2 & 1 & 1 &1 & 2 & 2  & 2 & 2 &2 & 2 & 2    & 2 & 2 & 2 & 2 & 2 \\
2979                    & 3 & 0 & 0 &0 & 0 & 0  & 0 & 0 &0 & 0 & 0    & 0 & 0 & 0 & 0 & 0 \\
2980     \hline
2981                    & 1 & 1 & 1 &1 & 1 & 1  & 1 & 1 &1 & 1 & 1    & 1 & 1 & 1 & 1 & 1 \\
2982        2           & 2 & 1 & 0 &0 & 0 & 0  & 1 & 0 &0 & 0 & 3    & 1 & 0 & 0 & 0 & 3 \\
2983                    & 3 & 1 & 2 &2 & 2 & 2  & 1 & 2 &2 & 2 & 0    & 1 & 2 & 2 & 2 & 0 \\
2984     \hline
2985                    & 1 & 1 & 0 &1 & 1 & 0  & 1 & 1 &0 & 1 & 1    & 0 & 0 & 0 & 1 & 1 \\
2986       3            & 2 & 0 & 1 &3 & 0 & 1  & 0 & 0 &1 & 3 & 2    & 1 & 1 & 1 & 3 & 2 \\
2987                    & 3 & 3 & 3 &1 & 3 & 3  & 3 & 3 &3 & 1 & 2    & 3 & 3 & 3 & 1 & 2 \\
2988\hline
2989\multicolumn{2}{|c||}{Design cost (k\$)} & 38.5& 39.5 &40&\!\! 44 &\!\! 44.5    & 43.5 & 44 &44.5& 45 & 46    & 54 & \!\! 54.5\!\! &54.5\!\!& 55\!\! &56\\
2990\hline
2991\end{tabular}
2992\end{center}
2993\end{table}
2994
2995
2996\begin{table}[!htbp]\label{d-table}\scriptsize%\footnotesize%\small%
2997\caption{ \footnotesize  The design table for $K = 10$ model without side information}
2998\begin{center}
2999\begin{tabular}{|c| c|| ccccc || ccccc || ccccc |}\hline
3000\multirow{2}{*}{Subsystem} & \multirow{2}{*}{Type} & \multicolumn{5}{c||}{$R_0=0.95$,~$L_S$ (yrs) }& \multicolumn{5}{c||}{$R_0=0.97$,~$L_S$ (yrs)}& \multicolumn{5}{c|}{$R_0=0.99$,~$L_S$ (yrs)} \\
3001 \cline{4-18}
3002      &&28 & 28.5 & 29 &  29.5 & 30 &28 & 28.5 & 29 &  29.5 & 30 &28 & 28.5 & 29 &  29.5 & 30  \\
3003     \hline
3004                    & 1 & 1 & 0 &0 & 0 & 0  & 0 & 0 &0 & 0 & 0    & 1 & 1 & 1 & 1 & 1 \\
3005        1           & 2 & 1 & 2 &2 & 2 & 2  & 2 & 2 &2 & 2 & 2    & 2 & 2 & 2 & 2 & 2 \\
3006                    & 3 & 0 & 0 &0 & 0 & 0  & 0 & 0 &0 & 0 & 0    & 0 & 0 & 0 & 0 & 0 \\
3007     \hline
3008                    & 1 & 1 & 1 &1 & 1 & 1  & 1 & 1 &1 & 1 & 1    & 1 & 1 & 1 & 1 & 1 \\
3009        2           & 2 & 1 & 2 &0 & 0 & 3  & 1 & 0 &0 & 2 & 2    & 1 & 0 & 0 & 0 & 3 \\
3010                    & 3 & 1 & 0 &2 & 2 & 0  & 1 & 2 &2 & 1 & 1    & 1 & 2 & 2 & 2 & 0 \\
3011     \hline
3012                    & 1 & 1 & 1 &1 & 1 & 1  & 0 & 1 &1 & 1 & 0    & 0 & 1 & 1 & 1 & 1 \\
3013       3            & 2 & 3 & 3 &0 & 0 & 3  & 1 & 3 &3 & 3 & 2    & 1 & 3 & 3 & 3 & 2 \\
3014                    & 3 & 1 & 1 &3 & 3 & 1  & 3 & 1 &1 & 1 & 3    & 3 & 1 & 1 & 1 & 2 \\
3015\hline
3016\multicolumn{2}{|c||}{Design cost (k\$)} & 39.5& 44 &44&\!\! 44 &\!\! 45.5    & 44 & 45 &45& 46 & 47.5    & 54 & \!\! 55\!\! &55\!\!& 55\!\! &56\\
3017\hline
3018\end{tabular}
3019\end{center}
3020\end{table}
3021\fi
3022
3023\iffalse
3024Data creating process finised
3025Academic license - for non-commercial use only
3026cost is 38.5003857478587 , L_S is 26, R_0 is 0.95
3027
3028(((1.0, -0.0, -0.0), (1.0, -0.0, -0.0), (7.563683503866358e-06, 7.563683503866358e-06, 7.563683503866358e-06)), ((-0.0, -0.0, 1.0), (-0.0, 1.0, -0.0), (1.0, -0.0, -0.0)), ((-0.0, -0.0, -0.0), (1.0, -0.0, 1.0), (1.0, 1.0, -0.0)))
3029cost is 43.0 , L_S is 26, R_0 is 0.97
3030
3031(((-0.0, -0.0, -0.0), (-0.0, 1.0, 1.0), (-0.0, -0.0, -0.0)), ((-0.0, -0.0, 1.0), (-0.0, 1.0, 1.0), (-0.0, -0.0, -0.0)), ((-0.0, -0.0, -0.0), (1.0, 1.0, -0.0), (1.0, -0.0, 1.0)))
3032cost is 53.0 , L_S is 26, R_0 is 0.99
3033
3034(((-0.0, 1.0, -0.0), (-0.0, 1.0, 1.0), (-0.0, -0.0, -0.0)), ((-0.0, 1.0, -0.0), (-0.0, 1.0, 1.0), (-0.0, -0.0, -0.0)), ((-0.0, -0.0, -0.0), (1.0, 1.0, -0.0), (-0.0, 1.0, 1.0)))
3035cost is 44.0 , L_S is 27, R_0 is 0.95
3036
3037(((-0.0, -0.0, -0.0), (1.0, -0.0, 1.0), (0.0, -0.0, -0.0)), ((-0.0, 1.0, -0.0), (1.0, -0.0, -0.0), (1.0, -0.0, -0.0)), ((-0.0, -0.0, -0.0), (-0.0, -0.0, 1.0), (1.0, 1.0, 1.0)))
3038cost is 44.0 , L_S is 27, R_0 is 0.97
3039
3040(((-0.0, -0.0, -0.0), (1.0, 1.0, -0.0), (0.0, -0.0, -0.0)), ((-0.0, -0.0, 1.0), (-0.0, 1.0, -0.0), (0.0, 1.0, -0.0)), ((-0.0, -0.0, -0.0), (-0.0, 1.0, -0.0), (1.0, 1.0, 1.0)))
3041cost is 54.0 , L_S is 27, R_0 is 0.99
3042
3043(((-0.0, 1.0, -0.0), (-0.0, 1.0, 1.0), (0.0, -0.0, -0.0)), ((-0.0, 1.0, -0.0), (1.0, -0.0, -0.0), (-0.0, -0.0, 1.0)), ((-0.0, -0.0, -0.0), (-0.0, 1.0, -0.0), (1.0, 1.0, 1.0)))
3044cost is 44.0 , L_S is 28, R_0 is 0.95
3045
3046(((-0.0, -0.0, -0.0), (-0.0, 1.0, 1.0), (-0.0, -0.0, -0.0)), ((-0.0, 1.0, -0.0), (-0.0, 1.0, -0.0), (-0.0, 1.0, -0.0)), ((-0.0, -0.0, -0.0), (-0.0, -0.0, 1.0), (1.0, 1.0, 1.0)))
3047cost is 45.00017123897305 , L_S is 28, R_0 is 0.97
3048
3049(((5.707965768311441e-06, 5.707965768311441e-06, 5.707965768311441e-06), (1.0, 1.0, -0.0), (0.0, 0.0, -0.0)), ((0.0, 1.0, -0.0), (1.0, 1.0, 1.0), (-0.0, -0.0, -0.0)), ((-0.0, -0.0, 0.0), (-0.0, 1.0, -0.0), (1.0, 1.0, 1.0)))
3050cost is 54.0 , L_S is 28, R_0 is 0.99
3051
3052(((-0.0, -0.0, 1.0), (-0.0, 1.0, 1.0), (-0.0, -0.0, -0.0)), ((-0.0, 1.0, -0.0), (-0.0, 1.0, -0.0), (-0.0, 1.0, -0.0)), ((-0.0, -0.0, -0.0), (-0.0, -0.0, 1.0), (1.0, 1.0, 1.0)))
3053cost is 45.0 , L_S is 29, R_0 is 0.95
3054
3055(((-0.0, -0.0, -0.0), (1.0, -0.0, 1.0), (-0.0, -0.0, -0.0)), ((-0.0, -0.0, 1.0), (-0.0, -0.0, -0.0), (1.0, -0.0, 1.0)), ((-0.0, 1.0, -0.0), (1.0, 1.0, 1.0), (-0.0, -0.0, 1.0)))
3056cost is 54.5 , L_S is 29, R_0 is 0.97
3057
3058(((-0.0, -0.0, 1.0), (-0.0, 1.0, 1.0), (-0.0, -0.0, -0.0)), ((1.0, -0.0, -0.0), (-0.0, -0.0, -0.0), (1.0, 1.0, -0.0)), ((-0.0, -0.0, -0.0), (1.0, -0.0, -0.0), (1.0, 1.0, 1.0)))
3059cost is 55.0 , L_S is 29, R_0 is 0.99
3060
3061(((-0.0, -0.0, 1.0), (1.0, 1.0, -0.0), (-0.0, -0.0, -0.0)), ((-0.0, -0.0, 1.0), (-0.0, -0.0, -0.0), (1.0, -0.0, 1.0)), ((-0.0, 1.0, -0.0), (1.0, 1.0, 1.0), (-0.0, 1.0, -0.0)))
3062cost is 55.0 , L_S is 30, R_0 is 0.95
3063
3064(((-0.0, -0.0, 1.0), (1.0, 1.0, -0.0), (-0.0, -0.0, -0.0)), ((-0.0, -0.0, 1.0), (1.0, 1.0, 1.0), (-0.0, -0.0, -0.0)), ((-0.0, -0.0, -0.0), (1.0, -0.0, -0.0), (1.0, 1.0, 1.0)))
3065cost is 55.5 , L_S is 30, R_0 is 0.97
3066
3067(((-0.0, -0.0, 1.0), (-0.0, 1.0, 1.0), (0.0, -0.0, -0.0)), ((-0.0, -0.0, 1.0), (1.0, 1.0, 1.0), (-0.0, -0.0, -0.0)), ((-0.0, -0.0, 1.0), (1.0, 1.0, 1.0), (-0.0, 1.0, -0.0)))
3068cost is 56.0 , L_S is 30, R_0 is 0.99
3069
3070(((1.0, -0.0, -0.0), (1.0, 1.0, -0.0), (-0.0, -0.0, -0.0)), ((-0.0, -0.0, 1.0), (1.0, 1.0, 1.0), (-0.0, -0.0, -0.0)), ((1.0, 0.0, -0.0), (1.0, 1.0, -0.0), (-0.0, 1.0, 1.0)))
3071\fi
3072\subsection{Comparison with a baseline probabilistic model}
3073
3074\begin{figure}[H]
3075\centering
3076\includegraphics[scale=0.8]{KVARIATION55.pdf}
3077\caption{\footnotesize The series-parallel system we are studying in this section, consists of a single type of component, with active parallel strategy only. This simplicity is due to the limitation of baseline probabilistic model.}
3078\label{figure1}
3079\end{figure}
3080
3081To illustrate the performance of our robust reliability model, we compare the design ($x^{(1)}$) obtained from the proposed robust redundancy optimization model with the design ($x^{(2)}$) obtained from a probabilistic redundancy optimization model. We choose $K = 5$, correspond to $\lambda = 0.8$ in the previous subsection. As mentioned in the Introduction and Literature Review, when the situation involves multiple types ({\it i.e.,} $|\mathbf{M}_i|>1$), or both the cold-standby and active parallel redundant subsystems are considered, the probabilistic model generally becomes intractable. Therefore, for a fair comparison, we consider a series-parallel system with $|\mathbf{N}| = 3$ and $|\mathbf{M}_i|=1, \forall i \in [3]$, which preserves a linear MIP formulation for the probabilistic model. For a coherent exposition of the experimental study, we place the details of the probabilistic redundancy model as well as its MIP transformation in the Electronic Companion.
3082
3083\iffalse
3084In particular, we first randomly generate lifetime samples (size=2500) and then compute the probability levels $\P[\tilde{z}_{i}\le L_S ], \forall i \in [3]$ and the parameters $(\hnu, \underline{\hmu}, \overline{\hmu}, \bm{\hsigma}, \p)$ from the generated lifetime samples for parameter inputs of probabilistic and robust models, respectively, where $R_0=0.95$ and $L_S=7.5$. We obtain the designs by solving the perspective redundancy models. \fi We perform out-of-sample experiments in the similar way as the out-of-sample-test performed in Section 5.2. We increases the out-of-sample standard deviation ($\Delta_S = 10\%, 15\%, 20\%$, respectively),and shrink the lifetime mean ($\Delta_M = 5\%, 8\%, 10\%$) at the same time. The out-of-sample system lifetimes are compared and plotted in figure, and the comparison of out-of-sample reliability levels is provided in table.
3085\begin{table}[H]\footnotesize%\small%
3086\caption{\label{tab-compare2} \footnotesize  Out-of-sample reliability level comparison ($R_0=0.95$), where `Design' specifies the number of components allocated in each of 3 subsystems, and `P-Model' and `C-DRO-Model' refer to the probabilistic model and robust model, respectively. }
3087\begin{center}
3088\begin{tabular}{|c||c|c|| c| c |c |c   |}\hline
3089    $\mathcal{T}_S $ & $(\Delta_{M}, \Delta_{S})$ &Model & Design &   Designed reliability level &Mean of out-of-sample reliability level & StD \\
3090     \hline
3091    \multirow{19}{*}{7} & \multirow{2}{*}{(5\%, 10\%)}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.95$}& 0.914 & 0.280   \\
3092     && C-DRO-Model & (1,1,2) &  &0.993 & 0.084  \\
3093        \cline{2-7}
3094     &\multirow{2}{*}{(5\%, 15\%)}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.95$}& 0.913 & 0.282   \\
3095     && C-DRO-Model & (1,1,2) &  &0.990 & 0.097  \\
3096        \cline{2-7}
3097     &\multirow{2}{*}{(5\%, 20\%)}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.95$}& 0.908 & 0.289   \\
3098     && C-DRO-Model & (1,1,2) &  &0.994 & 0.075  \\
3099
3100
3101
3102     \clineB{2-7}{2}
3103
3104     & \multirow{2}{*}{(8\%, 10\%)}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.95$}& 0.835 & 0.371   \\
3105     && C-DRO-Model & (1,1,2) &  &0.971 & 0.167  \\
3106        \cline{2-7}
3107     &\multirow{2}{*}{(8\%, 15\%)}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.95$}& 0.830 & 0.37   \\
3108     && C-DRO-Model & (1,1,2) &  &0.968 & 0.177  \\
3109        \cline{2-7}
3110     &\multirow{2}{*}{(8\%, 20\%)}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.95$}& 0.825 & 0.380   \\
3111     && C-DRO-Model & (1,1,2) &  &0.968 & 0.177  \\
3112
3113     \clineB{2-7}{2}
3114     & \multirow{2}{*}{(10\%, 10\%)}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.95$}& 0.745 & 0.436   \\
3115     && C-DRO-Model & (1,1,2) &  &0.932 & 0.252  \\
3116        \cline{2-7}
3117     &\multirow{2}{*}{(10\%, 15\%)}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.95$}& 0.754 & 0.431   \\
3118     && C-DRO-Model & (1,1,2) &  &0.930 & 0.256  \\
3119        \cline{2-7}
3120     &\multirow{2}{*}{(10\%, 20\%)}& P-Model & (1,1,1) & \multirow{2}{*}{$R_0=0.95$}& 0.742 & 0.438   \\
3121     && C-DRO-Model & (1,1,2) &  &0.946 & 0.227  \\
3122\hline
3123\end{tabular}
3124\end{center}
3125\end{table}
3126
3127\begin{figure}[H]
3128\centering
3129\includegraphics[width=\columnwidth]{Out_of_sample_def.png}
3130\caption{\footnotesize The out-of-sample system lifetime scenarios of robust model with clustering and probabilistic model under different $\Delta_{M}$ and $\Delta_{S}$. The vertical beam represents $\mathcal{T}_S $. The fraction of the lifetime histogram on the right side of beam represents the out-of-sample reliability level.}
3131\label{figure_out_of_sample_test_def}
3132\end{figure}
3133
3134We can observe that the out-of-sample reliability of the design from baseline model becomes unsatisfiable, while the design from robust model can still keep the reliability level intact. Robust model outperforms baseline model more as $\Delta_M$ and $\Delta_S$ increases. This illustrates that our model outperforms the baseline model in that it is significantly more robust, especially under extreme circumstances. In addition, as mentioned before, our model is tractable for multi-type mixed strategy systems, while the probabilistic model will become intractable. Thus our model is superior in both robustness and computability.
3135
3136\subsection{Case study}
3137
3138In this section we present a case study of the braking system on high speed train, to provide an example of applying our model in practice. The braking system is a critical subsystem on the train, as the failure of the system can cost hundreds of life. Therefore, it is a common practice to install multiple redundant braking mechanisms on the train, either cold-standby or active-parallel. In this study we model the problem as a system consists of a single subsystem, with 3 types of braking components:  MTB, pneumatically brake stop cock, and emergency brake valve. MTB components works in active-parallel, while the other twos are cold-standby redundancies. Up to 2 MTBs, 8 pneumatically brake stop cocks, and 2 emergency brake valves may be installed.
3139
3140\begin{figure}[H]
3141\centering
3142\includegraphics[scale=0.6]{case-description-edited.pdf}
3143\caption{ The braking system in this case. It consists of 3 types of component: pneumatically brake stop cock, MTB, and emergency brake valve. MTB components works in active-parallel, while the other twos are cold-standby redundancies. Only two redundant components are shown in figure for each type for simplicity.}
3144\label{figure1}
3145\end{figure}
3146
3147A subset of the dataset is presented below.
3148
3149\begin{table}[H]\label{without side information-table}%\scriptsize%\footnotesize%\small%
3150\caption{ \footnotesize  A sample of dataset.}
3151\begin{center}
3152\begin{tabular}{|c|c| c|c|}\hline
3153Type & {Speed (km/s)} & {Emergency brake valve status} & {Mileage at failure(kms)} \\
3154\hline
3155\multirow{5}{*} {Pneumatically brake stop cock} & 0 & 1 & 1205489.233 \\
3156&3.191489362 &0&1210654.084\\
3157&68.08510638&0&1212826.491\\
3158&100&1&1214834.413\\
3159&\vdots&\vdots&\vdots\\
3160\hline
3161\multirow{5}{*} {MTB} & 0 & 1 & 1209897.965\\
3162& 25.53191489 & 0 & 1213133.429\\
3163& 3.191489362	&1 & 1213997.540\\
3164& 58.5106383 & 0 & 1215082.013\\
3165&\vdots&\vdots&\vdots\\
3166
3167\hline
3168\multirow{5}{*} {Emergency brake valve} & 3.191489362 & 0 & 1210654.084\\
3169& 4.255319149 & 1 & 1212022.818\\
3170& 68.08510638	&0 & 1212826.491\\
3171& 35.10638298 & 0 & 1217498.174\\
3172&\vdots&\vdots&\vdots\\
3173\hline
3174
3175\end{tabular}
3176\end{center}
3177\end{table}
3178
3179In this case, lifetime is measured by the distance the train has run when the component is installed, instead of time, since failure rate is more closely related to the former. We can compute the lifetime by subtracting the mileage when the component is installed by the mileage when the component fails.
3180
3181The speed and the binary internal status of emergency brake valve at the point of failure are suspect to be correlated to failure rate. High speed may overload the braking system that already accumulates much attrition. The internal status, although does not directly tells us whether the component fails, might contain useful information of the braking system as a whole at the point of failure, like the flight recorder ("black box") on aeroplanes. Therefore, we incorporated them as side information in the clustering process.
3182
3183First, we applies the cross validation algorithm to search for ideal $K$. We choose $\lambda = 0.8$, since robustness is critical in high speed train. The result is shown in the figure below.
3184
3185\begin{figure}[H]
3186\centering
3187\includegraphics[width=\columnwidth]{real_case_cv.jpg}
3188\caption{\footnotesize (a) The number of violations and costs with different $K$. (b) Costs penalized by $\lambda$ with different $K$, with each line associated with a different $\lambda$.}
3189\label{figure4-2}
3190\end{figure}
3191
3192
3193 The results shows that $K = 3, 4, 5$ is ideal. We subsequently obtain the design in the table below using $K=5$, using side information in the clustering process. In particular, we divide the speed into 3 cases: stationary (under 20 km/s), low speed (20-50 km/s) and high speed (over 50 km/s). The valve openness is a boolean value and is naturally divided into 2 cases. Therefore, we can divide the dataset to 6 clusters based on speed and valve status. However, in our dataset there is no instance in the class of low speed and valve status 1. This leaves us with 5 clusters ($K = 5$). We then collect distributional information from each of the clusters, and obtain the design.
3194
3195\iffalse
3196\begin{table}[H]\label{without side information-table}\scriptsize%\footnotesize%\small%
3197\caption{ \footnotesize  The design table for optimal clusters $K = 5$ of real case model. Cock, MTB and Emergency represents the pneumatically brake stop cocks, MTB and emergency brake valves, respectively.}
3198\begin{center}
3199\begin{tabular}{|c|c| c|| ccc || ccc || ccc || ccc |}\hline
3200\multirow{2}{*}{$\epsilon$} & \multirow{2}{*}{Subsystem} & \multirow{2}{*}{Type} & \multicolumn{3}{c||}{$R_0=0.95$,~$L_S$ (kms) }& \multicolumn{3}{c||}{$R_0=0.97$,~$L_S$ (kms)}& \multicolumn{3}{c||}{$R_0=0.98$,~$L_S$ (kms)}& \multicolumn{3}{c|}{$R_0=0.99$,~$L_S$ (kms)} \\
3201 \cline{4-15}
3202      &&&2000 & 2250 & 2500 &  2000 & 2250 &2500 & 2000 & 2250 &  2500 & 2000 & 2250 &  2500  \\
3203     \cline{1-15}
3204                  && Cock & 5 & 6 &7 & 6 & 6  & 7 & 6 &7& 7 & 6 &7& 8 \\
3205$\epsilon=0.5$      & 1 & MTB & 1 & 1 &1 & 0 & 1  & 1 & 0 &0 & 1& 1 &1 & 1 \\
3206                 &&Emergency  & 2 & 2 &2 & 2 & 2 &2  & 2 & 2 &2  & 1 & 1 &1  \\
3207
3208\hline
3209\multicolumn{3}{|c||}{Design cost (k\$)} & 27& 31 &35&\!\! 28 &\!\! 31    &35 & 28 &32& 35 & 29    & 33 & \!\! 37 \\
3210\hline
3211\end{tabular}
3212\end{center}
3213\end{table}
3214\fi
3215
3216
3217\begin{table}[H]\label{new-table}\scriptsize%\footnotesize%\small%
3218\caption{ \footnotesize  The design table for $K = 5$ real case model with side information (S.I)}
3219\begin{center}
3220\begin{tabular}{|c|c| c|| ccc || ccc || ccc || ccc |}\hline
3221\multirow{3}{*}{With S.I,$\epsilon$} & \multirow{3}{*}{Subsystem} & \multirow{3}{*}{Type} &\multicolumn{3}{c||}{$R_0=0.95$ }& \multicolumn{3}{c||}{$R_0=0.97$}& \multicolumn{3}{c||}{$R_0=0.98$}& \multicolumn{3}{c|}{$R_0=0.99$} \\
3222\cline{4-15}
3223 &&&\multicolumn{3}{c||}{~$\mathcal{T}_S $ (kms) }& \multicolumn{3}{c||}{~$\mathcal{T}_S $ (kms)}& \multicolumn{3}{c||}{~$\mathcal{T}_S $ (kms)}& \multicolumn{3}{c|}{~$\mathcal{T}_S $ (kms)}\\
3224
3225      &&&2000 & 2250 & 2500 &  2000 & 2250 &2500 & 2000 & 2250 &  2500 & 2000 & 2250 &  2500  \\
3226     \cline{1-15}
3227                  && Cock & 5 & 6 &7 & 5 & 6  & 7 & 6 &6& 7 & 6 &6& 7 \\
3228        $\epsilon=0.025$& 1           & MTB& 1 & 1 &1 & 1 & 1  & 1 & 0 &1 & 1& 1 &1 & 1  \\
3229                  &&Emergency & 2 & 2 &2 & 2 & 2 &2  & 2 & 2 &2  & 1 & 2 &2  \\
3230
3231\hline
3232\multicolumn{3}{|c||}{Design cost (k\$)} & 27& 31 &35&\!\! 27 &\!\! 31    &35 & 28 &31& 35 & 29    & 31 & \!\! 35 \\
3233
3234     \cline{1-15}
3235                 && Cock& 5 & 6 &7 & 6 & 6  & 7 & 6 &7& 7 & 6 &7& 8  \\
3236         $\epsilon=0.05$& 1           & MTB & 1 & 1 &1 & 0 & 1  & 1 & 0 &1 & 1& 1 &1 & 0 \\
3237                 &&Emergency & 2 & 2 &2 & 2 & 2 &2  & 2 & 2 &2  & 1 & 2 &2  \\
3238
3239\hline
3240\multicolumn{3}{|c||}{Design cost (k\$)} & 27& 31 &35&\!\! 28 &\!\! 31    &35 & 28 &32& 35 & 29    & 33 & \!\! 36 \\
3241
3242     \cline{1-15}
3243                  && Cock& 6 & 6 &7 & 6 & 7 & 7 & 6 &7& 8 & 7 &7& 8 \\
3244        $\epsilon=0.075$& 1           & MTB  & 0 & 1 &1 & 1 & 0  & 1 & 1 &1 & 0& 0 &1 & 1 \\
3245                 &&Emergency & 2 & 2 &2 & 1 & 2 &2  & 1 & 1 &2  & 1 & 1 &1  \\
3246
3247\hline
3248\multicolumn{3}{|c||}{Design cost (k\$)} & 28& 31 &35&\!\! 29 &\!\! 32    &35 & 29 &33& 36 & 30    & 33 & \!\! 37 \\
3249\hline
3250\end{tabular}
3251\end{center}
3252\end{table}
3253
3254We can observe that as lifetime requirement $\mathcal{T}_S $, level of robustness $R_0$, or dispersion parameter $\epsilon$ increases, the cost of design generally increases. This is due to either extra components installed (for example, when $\mathcal{T}_S $ increases from 2000 to 2250 under $R_0 = 0.95$, $\epsilon = 0.05$, an extra cock is installed), or a cheaper but less robust component is replaced by a more expensive but more robust one (for example, when $R_0$ increases from 0.95 to 0.97 under $\mathcal{T}_S = 2000$,$\epsilon = 0.05$, the single MTB is replaced by a cock).
3255
3256\section{Conclusions}{\color{red}
3257Redundancy allocation problem (RAP) is a critical problem in the field of reliability engineering. Distributionally robust optimization (DRO) models has been employed to high robustness in RAP. We present a framework of combining clustering and dimension reduction into DRO models, to achieve a smaller ambiguity set, which helps alleviate the problem that DRO models can sometimes be over conservative. This framework can incorporate side information to improve the ambiguity set, or, in absence of side information, use clustering to discover and utilize underlying structures of the data. We develop a mixed integer linear program (MILP)reformulation of the model without adding additional integer variables, thus keeping the model tractable. We also devise the supergradient based algorithm to decompose the MILP formulation, so that the model can be solved by multiple computers in parallel, which makes the model practically viable even with large number of clusters.
3258}
3259
3260\begin{thebibliography}{}
3261
3262\bibitem{Ardakan2014}Ardakan M. A., A. Z. Hamadani, M. Alinaghian. Optimizing bi-objective redundancy allocation problem with a mixed redundancy strategy. {\em ISA transactions}~{\bf 55} :116--128.
3263
3264\bibitem{Bertsimas2011}Bertsimas D., M. Sim. 2004. The price of robustness. {\em Operations Research}~{\bf 52}~(1):35--53.
3265
3266\bibitem{Bhunia2010} Bhunia, A. K., L. Sahoo,  D. Roy. 2010. Reliability stochastic optimization
3267for a series system with interval component reliability via genetic
3268algorithm.  {\em Appl.Math. Computat.}~{\bf 216}~(3): 929--939, 2010.
3269
3270
3271\bibitem{Chern1992}Chern, M.S. 1992. On the computational complexity of reliability redundancy allocation in a series system. {\em Operations research letters}~{\bf 11}~(5):309--315.
3272
3273\bibitem{Cheng2009}Cheng, Z., X. Wang, C. Tian, F. Wang. 2009. Mission reliability simulation of High-speed EMU service braking system. {\em Proceedings of the 8th International Conference on Reliability}, Maintainability and Safety (ICRMS 2009), 253--256.
3274
3275\bibitem{Coit1998}Coit, D. W., A.E. Smith. 1998. Redundancy allocation to maximize a lower percentile of the system time-to-failure distribution. {\em IEEE Trans. Rel.}~ {\bf 47}~(1):79--87.
3276
3277\bibitem{Coit2003}Coit, D. W. 2003. Maximization of system reliability with a choice of redundancy strategies. {\em IIE Transactions}~{\bf 35}~(6):535-543.
3278
3279\bibitem{Coit2004} Coit, D.W.,  T. Jin,  N. Wattanapongsakorn. 2004. System optimization
3280with component reliability estimation uncertainty: A multi-criteria approach. {\em IEEE Trans. Rel.}~ {\bf 53}~(3) :  369--380, 2004.
3281
3282
3283
3284
3285\bibitem{Elsayed2012}Elsayed, E A. 2012. {\em Reliability Engineering}. 2nd Edition. Wiley.
3286
3287
3288\bibitem{Govindan2017}Govindan, K., A. Jafarian, M.E. Azbari, T.M. Choi. 2017. Optimal bi-objective redundancy allocation for systems reliability and risk management. {\em IEEE Transactions on Cybernetics}~{\bf 46}~(8):1735--1748.
3289
3290
3291
3292%\bibitem{Lam2012}Lam SW, T.S. Ng, and M. Sim. (2012). Multiple objectives satisficing under uncertainty. To appear in Operations Research, 2012.
3293
3294%\bibitem{Lin2011}Lin J, Muthuraman K, Lawley M (2011) Optimal and approximate algorithms for sequential clinical scheduling with no-shows. {\it IIE Transactions on Healthcare Systems Engineering} 1:20--36.
3295
3296 %\bibitem{McCarthy2000} McCarthy K, McGee HM, O'Boyle CA. 2000. Outpatient clinic waiting times and non-attendance as indicators of quality. {\it Psychology, Health and Medicine} 5: 287--293.
3297
3298\bibitem{Grani2017} Hanasusanto, G. A., V. Roitch, D. Kuhn, W. Wiesemann. 2017. Ambiguous joint chance constraints under mean and dispersion information. {\it Operations Research}~{\bf 65}~(3):715--767.
3299
3300
3301
3302
3303
3304\bibitem{Elegbede2003}Elegbede, A.C., C. Chu, K.H. Adjallah, F. Yalaoui. 2003. Reliability allocation through cost minimization. {\em IEEE Transactions on reliability}~{\bf 52}~(1):106--111.
3305
3306\bibitem{Feizollahi2012} Feizollahi, M.J., M. Modarres. 2012. The robust deviation redundancy allocation problem with interval component reliabilities. {\em IEEE Transactions on reliability}~{\bf 61}~(4):957--965.
3307
3308
3309
3310
3311
3312\bibitem{Feizollahi2014}Feizollahi, M.J., S. Ahmed, M. Modarres. 2014. The robust redundancy allocation problem in series-parallel systems with budgeted uncertainty.  {\em IEEE Transactions on reliability}~{\bf 63}~(1):239--250.
3313
3314\bibitem{Feizollahi2015} Feizollahi, M.J., R. Soltan, H. Feyzollahi. 2015. The robust cold standby redundancy allocation in series-parallel systems with budgeted uncertainty. {\em IEEE Transactions on reliability}~{\bf 64}~(2):799--806.
3315
3316\bibitem{Friedman2001} Friedman, J., T. Hastie, R. Tibshirani. 2001. {\em The elements of statistical learning.} Springer series in statistics. New York.
3317
3318\bibitem{Hasegawa1999}Hasegawa, I., Uchida, S. 1999. Braking systems. {\em Japan Railway and Transport Review}~{\bf 20}:52-59.
3319
3320\bibitem{James2013} James, G., D. Witten, T. Hastie, R. Tibshirani. 2013. {\em An introduction to statistical learning.} Springer series in statistics. New York.
3321
3322\bibitem{Ketchen1996}Ketchen, Jr. D.J., C.L. Shook, 1996. The application of cluster analysis in Strategic Management Research: An analysis and critique. {\em Strategic Management Journal}~{\bf 17}~(6):441-458.
3323
3324\bibitem{Kuo2001}Kuo, W., V.R. Prasad, F.A. Tillman, C.L. Hwang. 2001. {\em Optimal Reliability Design: Fundamentals and Applications.} Cambridge university press. Cambridge.
3325
3326\bibitem{Kuo2007}Kuo, W., R. Wan. 2007. Recent advances in optimal reliability allocation. {\em IEEE Transactions on Systems, Man, and Cybernetics-Part A: Systems and Humans}~{\bf 37}~(2):143-156.
3327
3328\bibitem{Li2014}Li, Y.F., Y. Ding, E. Zio. 2014. Random fuzzy extension of the universal generating function approach for the
3329reliability assessment of multi-state systems under aleatory and epistemic uncertainties. {\em IEEE Transactions on Reliability}~{\bf 63}~(1):13--25.
3330
3331\bibitem{Li2011}Li, C.Y., X. Chen, X.S. Yi, J.Y. Tao. 2011. Interval-valued reliability analysis of multi-state systems. {\em IEEE Transactions on Reliability}~{\bf 60}~(1):323--330.
3332
3333\bibitem{Li2008} Li, X.,  X. Hu. 2008. Some new stochastic comparisons for redundancy
3334allocations in series and parallel systems.~{\em  Statist. Probabil. Lett.}~{\bf 78}~(18): 3388--3394.
3335
3336
3337
3338\bibitem{Liao2014}Liao, L., F. K\"{o}ttig. 2014. Review of hybrid prognostics approaches for remaining useful life prediction of engineered systems, and an application to battery life prediction. {\em IEEE Transactions on Reliability}. {\bf 63}~(1):191--207.
3339
3340
3341
3342
3343\bibitem{MacQueen1967} MacQueen, J. 1967. Some methods for classification and analysis of multivariate observations. {\em Proceedings of the Fifth Berkeley Symposium on Mathematical Statistics and Probability} {\bf 1}~(Statistics):281--297.
3344
3345
3346
3347\bibitem{ Marseguerra2005} Marseguerra, M., E. Zio, L. Podofillini, D. W. Coit. 2005. Optimal design of reliable network systems in presence of uncertainty. {\em IEEE Trans Rel.}~{\bf 54}~(2):243--253.
3348
3349\bibitem{Military1992}Military, U.S. 1992. Reliability prediction of electronic equipment. MIL-HDBK-217F Notice 1.
3350
3351\bibitem{Ng2002} Ng, A. Y, M. I. Jordan, Y. Weiss. 2002. On spectral clustering: Analysis and an algorithm. {\em Advances in neural information processing systems}:849-856.
3352
3353\bibitem{Ng2014} Ng, S. Y., Y. Xing, K. L. Tsui. 2014. A naive Bayes model for robust remaining useful life prediction of lithium-ion battery. {\em Applied Energy}~{\bf 118}: 114-123.
3354
3355
3356
3357
3358\bibitem{Prasad2001}Prasad, V. R., W. Kuo, K. O. Kim. 2001. Maximization of a percentile life of a series system through component redundancy allocation. {\em IIE Transactions}~{\bf 33}~(12):1071--1079.
3359
3360\bibitem{Pecht2008} Pecht, M. 2008. {\em Prognostics and Health Management of Electronics.} John Wiley \& Sons, Ltd.
3361
3362\bibitem{Quinlan1986}Quinlan, J. R. 1986. Induction of decision trees. {\em Machine Learning}~{\bf 1}: 81--106.
3363
3364
3365\bibitem{Shapiro2001}Shapiro~A.~2001.~On duality theory of conic linear problems. In {\em Semi-Infinite Programming}, chapter 7, 135--165, Kluwer Academic Publishers, 2001.
3366
3367\bibitem{Sibson1973}Sibson R. 1973. SLINK: an optimally efficient algorithm for the single-link cluster method. {\em The Computer Journal. British Computer Society}~{\bf 16}~(1):30-34.
3368
3369\bibitem{Soltani2015}Soltani R., J. Safari, S.J. Sadjadi. 2015. Robust counterpart optimization for the redundancy allocation problem in series-parallel systems with component mixing under uncertainty. {\em Applied Mathematics \& Computation}~{\bf 271}~(C): 80--88.
3370
3371
3372
3373
3374\bibitem{Sun2017} Sun, M. X., Y. F. Li, E. Zio. 2017. On the optimal redundancy allocation for multi-state series-parallel systems under epistemic uncertainty. {\em Reliability Engineering \& System Safety}. Accepted.
3375
3376\bibitem{Tang2014}Tang, S., C. Yu, X. Wang, X. Guo, X. Si. 2014. Remaining useful life prediction of lithium-ion batteries based on the wiener process with measurement error. {\em Energies}~{\bf 7}~(2):520--547.
3377
3378\bibitem{Tekiner-Mogulkoc2011}Tekiner-Mogulkoc, H., D. W. Coit. 2011. System reliability optimization
3379considering uncertainty: Minimization of the coefficient of variation
3380for series-parallel systems.~{\em  IEEE Trans. Rel.}~{\bf 60}~(30): 667--674, 2011.
3381
3382\bibitem{Thorndike1953}Thorndike R. L. 1953. Who Belongs in the Family?. {\em Psychometrika}~{\bf 18}~(4):267-276.
3383
3384\bibitem{Wang2012} Wang, Y., L. Li, S. Huang, Q. Chang. 2012. Reliability and covariance estimation of weighted k-out-of-n multi-state Systems. {\em European Journal of Operational Research}~{\bf 221}:~138--147.
3385    
3386{\color{red} \bibitem{Wang2019} Wang, S.,...}
3387
3388\bibitem{Wisemann2014} Wiesemann, W.,  D. Kuhn, M. Sim. 2014. Distributionally robust convex optimization. {\it Operations Research}~{\bf 62} ~(6)~ 1358--1376.
3389
3390\bibitem{xie2017} Xie, W., Ahmed, S. 2017. Distributionally robust chance constrained optimal power flow with renewables: A conic reformulation. {\em IEEE Transactions on Power Systems.} Accepted.
3391
3392
3393\bibitem{Yalaoui2005}Yalaoui, A., E.  Chatelet, C. Chu. 2005. A new dynamic programming method for reliability redundancy allocation in a parallel-series system. {\em IEEE transactions on reliability}.~{\bf 54}~(2):254--261.
3394
3395
3396\bibitem{Zaretalab2015}Zaretalab, A., V. Hajipour, M. Sharifi, M. R. Shahriari. 2015. A knowledge-based archive multi-objective simulated annealing algorithm to optimize series-parallel system with choice of redundancy strategies. {\em Computers \& Industrial Engineering}~{\bf 80}:33-44.
3397
3398\bibitem{Zhao2003} Zhao, R., B. Liu. 2003. Stochastic programming models for general redundancy-optimization problems.~{\em IEEE Trans. Rel.}~{\bf 52}~(2): 181--191, 2003.
3399
3400\bibitem{Zhao2011} Zhao, P., P.S. Chan, H.K.T. Ng. 2011. Optimal allocation of redundancies in series systems. {\em European Journal of Operational Research}~{\bf 220}~(3):673--683.
3401
3402\bibitem{Kuhn2013}Zymler, S., D. Kuhn, B. Rustem. 2013. Distributionally robust joint chance constraints with second-order moment information, {\em Mathematical Programming}~{\bf 137}~(1-2):167--198.
3403\end{thebibliography}
3404\newpage
3405\section*{Appendix: A benchmark probabilistic reliability model}
3406As a benchmark for fair comparison, we consider the following probabilistic reliability model
3407\begin{eqnarray*}
3408\begin{array}{rcll}
3409& \min\limits_{\y} &  \sum\limits_{i\in \mathbf{N}}y_{i}c_{i} \\[0.3 cm]
3410& {\rm s.t.} &  \displaystyle \prod_{i \in \mathbf{N}}\left(1-r_{i}^{y_{i}}\right)\ge R_0  & \\[0.3 cm]
3411&& L_{i}\le y_{i}\le U_{i}, & \forall i \in \mathbf{N}\\
3412&& y_{i} \in \mathbb{Z}_+,  & \forall i \in \mathbf{N},
3413\end{array}
3414\end{eqnarray*}
3415where each subsystem is equipped with one type of components (i.e., $|\mathbf{M}_i|\equiv 1, \forall i \in \mathbf{N}$ ), and
3416$$
3417r_{i}=\P\Big[\tilde{z}_{i}\le \mathcal{T}_R \Big],
3418$$
3419which can be estimated from the data. By transforming the integer variable $y_{i}$ with binaries $x_{ik}$:
3420$$
3421y_{i}=L_{i}+\sum_{k=0}^{U_{i}-L_{i}}kx_{ik},~\mbox{with}~\sum_{k=0}^{U_{i}-L_{i}}x_{ik}=1,
3422$$
3423the above model can be linearized as the following MIP with binaries (Feizollahi and Modarres~2012):
3424\begin{eqnarray}
3425\begin{array}{rcll}
3426& \min\limits_{\x} & \displaystyle \sum\limits_{i\in \mathbf{N}} \left[L_{i}+\sum_{k=0}^{U_{i}-L_{i}}kx_{ik}\right]c_{i} \\[0.3 cm]
3427& {\rm s.t.} &  \displaystyle \sum_{i \in \mathbf{N}} \sum_{k=0}^{U_{i}-L_{i}}x_{ik}\ln\left[1-r_{i}^{L_{i}+k} \right]\ge \ln R_0  & \\[0.3 cm]
3428&& \displaystyle \sum_{k=0}^{U_{i}-L_{i}}x_{ik}=1, & \forall i \in \mathbf{N}\\
3429&& x_{ik} \in \{0,1\},  & \forall i \in \mathbf{N}, k \in [0; U_{i}-L_{i}],
3430\end{array}
3431\end{eqnarray}
3432which can be solved by off-the-shelf MIP solvers. Nevertheless, the above linear MIP transformation holds only for the regular series-parallel redundant systems with single type of components,  as for the situation that involves multiple types (i.e. $|\mathbf{M}_i|>1$ for some $i \in \mathbf{N}$) or the cold-standby subsystems are considered, the probabilistic model in general becomes intractable.
3433
3434%\section*{Appendix II}
3435%\begin{proposition}\label{P-proposition1b}
3436%Given a system design $\x$, the worst-case probabilistic chance function (\ref{Prob-1}) solves the following linear program (LP):
3437%\begin{eqnarray}
3438%&\!\!\!\!\!\! \max  &  1-  \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{k\in \mathbf{K}_{ij}} \left(\alpha^{k}_{ij}\underline{\mu}_{{ij}}+ \beta^{k}_{ij}\overline{\mu}_{{ij}}\right)-\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i}\epsilon_{ij}\lambda_{ij} - \tau \label{HP1-ambiguity-LP-FLaa} \\
3439% &\!\!\!\!\!\!{\rm s.t.} & \sum\limits_{j\in \mathbf{M}^{\rm p}_{l}}\sum\limits_{k\in \mathbf{K}_{lj}} q_{ljk}\mathcal{T}_R\nonumber\\
3440% &&+\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{k\in \mathbf{K}_{ij}} \left[ \phi^{l }^{k}_{ij}\underline{z}_{ij}+ \varphi^{l}^{k}_{ij}\overline{z}_{ij}  + {\nu_{ij}\left(\pi^{l }^{k}_{ij}-\varpi^{l }^{k}_{ij} \right)} \right]+\tau \ge 1,~\forall {l  \in [3;5]}  \label{HP1-ambiguity-LP-FL1aa}\\
3441% &&p_{l}\mathcal{T}_R+\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{k\in \mathbf{K}_{ij}} \left[ \phi^{l }^{k}_{ij}\underline{z}_{ij}+ \varphi^{l}^{k}_{ij}\overline{z}_{ij}  + {\nu_{ij}\left(\pi^{l }^{k}_{ij}-\varpi^{l }^{k}_{ij} \right)} \right]+\tau \ge 1,~\forall {l  \in [1;2]}\\
3442%  && \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{k\in \mathbf{K}_{ij}} \left[ \varsigma^{k}_{ij}\underline{z}_{ij}+ \vartheta^{k}_{ij}\overline{z}_{ij}  +  {\nu_{ij}\left(\gamma^{k}_{ij}-\theta^{k}_{ij} \right)}\right]+\tau \ge 0\label{HP1-ambiguity-LP-FL1-2aa}\\
3443% && q_{l jk}x_{l jk} +\phi^{l }_{l jk}+\varphi^{l }_{l jk}+ { \pi^{l }_{l jk}-\varpi^{l }_{l jk} }  = \alpha_{l jk}+\beta_{l jk},~\forall {l \in [3;5]}, j \in \mathbf{M}^{\rm p}_l, k\in \mathbf{K}_{l j}   \\
3444%&& p_{l}x_{l jk} +\phi^{l }_{l jk}+\varphi^{l }_{l jk}+ { \pi^{l }_{l jk}-\varpi^{l }_{l jk} }  = \alpha_{l jk}+\beta_{l jk},~\forall {l \in [1;2]}, j \in \mathbf{M}^{\rm c}_l, k\in \mathbf{K}_{l j}   \\
3445%&&\phi^{l }^{k}_{ij}+\varphi^{l }^{k}_{ij}+ { \pi^{l }^{k}_{ij}-\varpi^{l }^{k}_{ij} }= \alpha^{k}_{ij}+\beta^{k}_{ij}, ~ \forall {l  \in \mathbf{N}}, i \in \mathbf{N}\setminus\{l \}, j \in \mathbf{M}_i, k\in \mathbf{K}_{ij}  \label{HP1-ambiguity-LP-FL2aa}\\
3446%&&{|\mathbf{K}_{ij} |\sigma_{ij}} (\pi^{l }^{k}_{ij}+\varpi^{l }^{k}_{ij}) =\lambda_{ij},  ~ \forall {l  \in \mathbf{N}}, i \in \mathbf{N}, j\in \mathbf{M}_i, k\in \mathbf{K}_{ij} \\
3447%&& \varsigma^{k}_{ij}+ \vartheta^{k}_{ij} + { \gamma^{k}_{ij}-\theta^{k}_{ij} }= \alpha^{k}_{ij}+\beta^{k}_{ij}, ~ \forall  i \in \mathbf{N}, j\in \mathbf{M}_i, k\in \mathbf{K}_{ij} \\
3448%&&  {|\mathbf{K}_{ij} |\sigma_{ij}}(\gamma^{k}_{ij}+\theta^{k}_{ij}) = \lambda_{{ij}}, ~  \forall  i \in \mathbf{N}, j\in \mathbf{M}_i, k\in \mathbf{K}_{ij} \\
3449%%&&  q_{l jk}\le y_{{l jk}}, ~\forall l  \in \mathbf{N}, j \in \mathbf{M}_{l}, k\in \mathcal{N}(l,j) \\[0.3 cm]
3450%%&& y_{{l jk}} \ge M x_{l jk}, ~\forall l  \in \mathbf{N}, j \in \mathbf{M}_{l}, k\in \mathcal{N}(l,j) \\[0.3 cm]
3451%%&& y_{{l jk}} \le  q_{l jk }+(x_{l jk}-1)M, ~\forall l  \in \mathbf{N}, j \in \mathcal{J}({l}), k \in \mathcal{N}(l,j)\\[0.3 cm]
3452%%&&  \sum\limits_{k\in \mathbf{K}_{ij}} x^{k}_{ij}\ge  L_{ij}, ~  \forall  i \in \mathbf{N}, j\in \mathbf{M}_i \\
3453%&& \alpha^{k}_{ij}\le 0, \beta^{k}_{ij}\ge 0, \lambda_{{ij}} \ge 0, \tau \in \mathbb{R}, ~\forall i \in \mathbf{N}, j\in \mathbf{M}_i, k\in \mathbf{K}_{ij}\\
3454%&&q_{l jk}\le 0, ~\forall {l  \in [3;5]}, j\in \mathbf{M}^{\rm p}_{l}, k \in \mathbf{K}_{lj}\\
3455%&&p_{l}\le 0, ~\forall {l  \in [1;2]}\\
3456%&& \phi^{l }^{k}_{ij} \ge 0,  \varphi^{l }^{k}_{ij} \le 0,  \pi^{l }^{k}_{ij}\ge 0,\varpi^{l }^{k}_{ij}\ge 0, ~\forall {l  \in \mathbf{N}},  i \in \mathbf{N}, j\in \mathbf{M}_i, k\in \mathbf{K}_{ij}\\
3457%&& \theta^{k}_{ij}\ge 0, \gamma^{k}_{ij}\ge 0, \varsigma^{k}_{ij}\ge 0, \vartheta^{k}_{ij} \le 0, ~\forall  i \in \mathbf{N}, j\in \mathbf{M}_i, k\in \mathbf{K}_{ij}, \label{HP2-ambiguity-LP-FLaa}
3458%\end{eqnarray}
3459%where $\halpha, \hbeta, \hlambda, \tau, \q, \s, \hphi, \hvarphi, \hpi, \hvarpi, \htheta, \hgamma, \hvarsigma$ and $\hvartheta$ are auxiliary variables.
3460%\end{proposition}
3461%
3462%\begin{proposition}\label{proposition1b}
3463%The robust system reliability redundancy allocation problem (\ref{HP1-ambiguity-X}) can be cast into the following mixed integer linear program (MILP):
3464%\begin{eqnarray}
3465% & \min\limits_{\x} &  \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \left[\sum\limits_{k\in \mathbf{K}_{ij}} x^{k}_{ij}\right]c_{ij} \label{HP1-ambiguity-MILP-FL1aa}\\
3466% &{\rm s.t.} & 1-  \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{k\in \mathbf{K}_{ij}} \left(\alpha^{k}_{ij}\underline{\mu}_{{ij}}+ \beta^{k}_{ij}\overline{\mu}_{{ij}}\right)-\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i}\epsilon_{ij}\lambda_{ij} - \tau\ge R_{0}   \\
3467% &&  \sum_{j\in \mathbf{M}_i}\sum\limits_{k\in \mathbf{K}_{ij}} x^{k}_{ij}\ge  L_{i}, ~  \forall  i \in \mathbf{N} \\
3468%%&& \sum\limits_{j\in \mathbf{M}_{l}} \sum\limits_{k\in \mathbf{K}_{l j}}q_{l jk}\mathcal{T}_R\nonumber\\
3469%% &&+\sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{k\in \mathbf{K}_{ij}} \left[ \phi^{l }^{k}_{ij}\underline{z}_{ij}+ \varphi^{\varsigma}^{k}_{ij}\overline{z}_{ij}  + {\nu_{ij}\left(\pi^{l }^{k}_{ij}-\varpi^{l }^{k}_{ij} \right)} \right]+\tau \ge 1,~\forall {l  \in \mathbf{N}}  \\
3470%&& y^{\rm p}_{l jk} +\phi^{l }_{l jk}+\varphi^{l }_{l jk}+ { \pi^{l }_{l jk}-\varpi^{l }_{l jk} }  = \alpha_{l jk}+\beta_{l jk},~\forall {l \in [3;5]}, j \in \mathbf{M}^{\rm p}_{l}, k\in \mathbf{K}_{l j}    \\
3471%&& y^{\rm c}_{l jk} +\phi^{l }_{l jk}+\varphi^{l }_{l jk}+ { \pi^{l }_{l jk}-\varpi^{l }_{l jk} }  = \alpha_{l jk}+\beta_{l jk},~\forall {l \in [1;2]}, j \in \mathbf{M}^{\rm c}_{l}, k\in \mathbf{K}_{l j}    \\
3472% && (\ref{HP1-ambiguity-LP-FL1aa}-\ref{HP1-ambiguity-LP-FL1-2aa}); (\ref{HP1-ambiguity-LP-FL2aa})-(\ref{HP2-ambiguity-LP-FLaa})\\
3473%%&&\phi^{l }^{k}_{ij}+\varphi^{l }^{k}_{ij}+ { \pi^{l }^{k}_{ij}-\varpi^{l }^{k}_{ij} }= \alpha^{k}_{ij}+\beta^{k}_{ij}, ~ \forall {l  \in \mathbf{N}}, i \in \mathbf{N}\setminus\{l \}, j \in \mathbf{M}_i, k\in \mathbf{K}_{ij}  \\
3474%%&&{|\mathbf{K}_{ij} |\sigma_{ij}} (\pi^{l }^{k}_{ij}+\varpi^{l }^{k}_{ij}) =\lambda_{ij},  ~ \forall {l  \in \mathbf{N}}, i \in \mathbf{N}, j\in \mathbf{M}_i, k\in \mathbf{K}_{ij} \\
3475%%&& \sum\limits_{i\in \mathbf{N}}\sum\limits_{j\in \mathbf{M}_i} \sum\limits_{k\in \mathbf{K}_{ij}} \left[\left(\varsigma^{k}_{ij}\underline{z}_{ij}+ \vartheta^{k}_{ij}\overline{z}_{ij} \right) +  {\nu_{ij}\left(\gamma^{k}_{ij}-\theta^{k}_{ij} \right)}\right]+\tau \ge 0\\
3476%%&& \varsigma^{k}_{ij}+ \vartheta^{k}_{ij} + { \gamma^{k}_{ij}-\theta^{k}_{ij} }= \alpha^{k}_{ij}+\beta^{k}_{ij}, ~ \forall  i \in \mathbf{N}, j\in \mathbf{M}_i, k\in \mathbf{K}_{ij} \\
3477%%&&  {|\mathbf{K}_{ij} |\sigma_{ij}}(\gamma^{k}_{ij}+\theta^{k}_{ij}) = \lambda_{{ij}}, ~  \forall  i \in \mathbf{N}, j\in \mathbf{M}_i, k\in \mathbf{K}_{ij} \\
3478%&&  q_{l jk}\le y^{\rm p}_{{l jk}}, ~\forall l  \in [3;5], j \in \mathbf{M}^{\rm p}_{l}, k\in \mathbf{K}_{l j} \\
3479%&& y^{\rm p}_{{l jk}} \ge M_1 x_{l jk}, ~\forall l  \in [3;5], j \in \mathbf{M}^{\rm p}_{l}, k\in \mathbf{K}_{l j} \\
3480%&& y^{\rm p}_{{l jk}} \le  q_{l jk}+(x_{l jk}-1)M_1, ~\forall l  \in [3;5], j \in \mathbf{M}^{\rm p}_{l}, k \in \mathbf{K}_{l j}\\
3481%&&  p_{l}\le y^{\rm c}_{{l jk}}, ~\forall l  \in [1;2], j \in \mathbf{M}^{\rm c}_{l}, k\in \mathbf{K}_{l j} \\
3482%&& y^{\rm c}_{{l jk}} \ge M_2 x_{l jk}, ~\forall l  \in [1;2], j \in \mathbf{M}^{\rm c}_{l}, k\in \mathbf{K}_{l j} \\
3483%&& y^{\rm c}_{{l jk}} \le  p_{l }+(x_{l jk}-1)M_2, ~\forall l  \in [1;2], j \in \mathbf{M}^{\rm c}_{l}, k \in \mathbf{K}_{l j}\\
3484%%&& \alpha^{k}_{ij}\le 0, \beta^{k}_{ij}\ge 0, \lambda_{{ij}} \ge 0, \tau \in \Re, ~\forall i \in \mathbf{N}, j\in \mathbf{M}_i, k\in \mathbf{K}_{ij}\\
3485%%&&q_{l jk}\le 0, y_{l jk} \le 0, ~\forall {l  \in \mathbf{N}}, j\in \mathbf{M}_{l}, k\in \mathbf{K}_{l j} \\
3486%%&& \phi^{l }^{k}_{ij} \ge 0,  \varphi^{l }^{k}_{ij} \le 0,  \pi^{l }^{k}_{ij}\ge 0,\varpi^{l }^{k}_{ij}\ge 0, ~\forall {l  \in \mathbf{N}},  i \in \mathbf{N}, j\in \mathbf{M}_i, k\in \mathbf{K}_{ij}\\
3487%%&& \theta^{k}_{ij}\ge 0, \gamma^{k}_{ij}\ge 0, \varsigma^{k}_{ij}\ge 0, \vartheta^{k}_{ij} \le 0, ~\forall  i \in \mathbf{N}, j\in \mathbf{M}_i, k\in \mathbf{K}_{ij}\\
3488%&& y^{\rm p}_{{l jk}}\le 0,  ~ \forall l \in [3;5], j\in \mathbf{M}^{\rm a}_i, k \in \mathbf{K}_{l j}\\
3489%&& y^{\rm c}_{{l jk}}\le 0,  ~ \forall l \in [1;2], j\in \mathbf{M}^{\rm c}_i, k \in \mathbf{K}_{l j}\\
3490%&&  x^{k}_{ij} \in \{0,1\},  ~ \forall i \in \mathbf{N}, j\in \mathbf{M}_i, k \in \mathbf{K}_{ij}, \label{HP1-ambiguity-MILP-FL2aa}
3491%\end{eqnarray}
3492%where $\halpha, \hbeta, \hlambda, \tau, \q, \s, \y^{\rm p}, \y^{\rm c}, \hphi, \hvarphi, \hpi, \hvarpi, \htheta, \hvartheta, \hvarsigma$ and $\hvartheta$ are auxilary variables and $M$ is a sufficiently small negative number.
3493%\end{proposition}
3494
3495\end{document}
3496%%
3497%% E    nd of file `elsarticle-template-num.tex'.