· 6 years ago · Sep 06, 2019, 05:26 AM
1{
2 "cells": [
3 {
4 "cell_type": "markdown",
5 "metadata": {},
6 "source": [
7 "<h2><b>Travel Datathon 2019 Assessment</b></h2>\n",
8 "\n",
9 "Participants:<br>\n",
10 " Hasan Alp Boz, Sabancı University<br>\n",
11 " Mert Gürkan, Sabancı Üniversity<br>"
12 ]
13 },
14 {
15 "cell_type": "markdown",
16 "metadata": {},
17 "source": [
18 "<h4>Importing Packages and Initialization</h4>"
19 ]
20 },
21 {
22 "cell_type": "code",
23 "execution_count": 74,
24 "metadata": {},
25 "outputs": [],
26 "source": [
27 "# If you need to install, XGBoost to your environment\n",
28 "# !pip install xgboost"
29 ]
30 },
31 {
32 "cell_type": "code",
33 "execution_count": 33,
34 "metadata": {},
35 "outputs": [],
36 "source": [
37 "import pandas as pd\n",
38 "import matplotlib.pyplot as plt\n",
39 "import seaborn as sns\n",
40 "import numpy as np\n",
41 "from scipy import stats\n",
42 "from sklearn.model_selection import train_test_split\n",
43 "from sklearn.metrics import mean_squared_error\n",
44 "from sklearn.metrics import r2_score\n",
45 "\n",
46 "import os\n",
47 "os.environ['KMP_DUPLICATE_LIB_OK']='True'\n",
48 "\n",
49 "import xgboost as xgb\n",
50 "from sklearn.ensemble import RandomForestRegressor\n",
51 "\n",
52 "import warnings\n",
53 "warnings.filterwarnings('ignore')"
54 ]
55 },
56 {
57 "cell_type": "code",
58 "execution_count": 2,
59 "metadata": {},
60 "outputs": [],
61 "source": [
62 "# Please change the path of the .csv file to the location of it in your system\n",
63 "df = pd.read_csv(r\"C:\\Users\\Mert\\Desktop\\assessment\\assessment\\Assessment Data\\Assessment Train Data.csv\")"
64 ]
65 },
66 {
67 "cell_type": "markdown",
68 "metadata": {},
69 "source": [
70 "<h2>Data Exploration</h2>"
71 ]
72 },
73 {
74 "cell_type": "code",
75 "execution_count": 3,
76 "metadata": {},
77 "outputs": [
78 {
79 "data": {
80 "text/html": [
81 "<div>\n",
82 "<style scoped>\n",
83 " .dataframe tbody tr th:only-of-type {\n",
84 " vertical-align: middle;\n",
85 " }\n",
86 "\n",
87 " .dataframe tbody tr th {\n",
88 " vertical-align: top;\n",
89 " }\n",
90 "\n",
91 " .dataframe thead th {\n",
92 " text-align: right;\n",
93 " }\n",
94 "</style>\n",
95 "<table border=\"1\" class=\"dataframe\">\n",
96 " <thead>\n",
97 " <tr style=\"text-align: right;\">\n",
98 " <th></th>\n",
99 " <th>Departure_YMD_LMT</th>\n",
100 " <th>Operation_YMD_LMT</th>\n",
101 " <th>Departure_Airport</th>\n",
102 " <th>Operation_Airport</th>\n",
103 " <th>Terminal_Number</th>\n",
104 " <th>Terminal_Name</th>\n",
105 " <th>Operation_Initials</th>\n",
106 " <th>Operation_Sonic_Code</th>\n",
107 " <th>Operation_Channel</th>\n",
108 " <th>Passenger_Title</th>\n",
109 " <th>...</th>\n",
110 " <th>SWC_FLY</th>\n",
111 " <th>Cabin_Class</th>\n",
112 " <th>SWC_FQTV_Member</th>\n",
113 " <th>Passenger_Baggage_Count</th>\n",
114 " <th>Passenger_Baggage_Weight</th>\n",
115 " <th>SWC_Staff</th>\n",
116 " <th>SWC_CIP_Passenger</th>\n",
117 " <th>SWC_VIP_Passenger</th>\n",
118 " <th>SWC_Has_Infant</th>\n",
119 " <th>Operation_Count</th>\n",
120 " </tr>\n",
121 " </thead>\n",
122 " <tbody>\n",
123 " <tr>\n",
124 " <th>0</th>\n",
125 " <td>20190326</td>\n",
126 " <td>20190326</td>\n",
127 " <td>KDT</td>\n",
128 " <td>KDT</td>\n",
129 " <td>?</td>\n",
130 " <td>032363</td>\n",
131 " <td>KS</td>\n",
132 " <td>?</td>\n",
133 " <td>QC</td>\n",
134 " <td>MISTER</td>\n",
135 " <td>...</td>\n",
136 " <td>1</td>\n",
137 " <td>Y</td>\n",
138 " <td>1</td>\n",
139 " <td>0</td>\n",
140 " <td>0</td>\n",
141 " <td>0</td>\n",
142 " <td>1</td>\n",
143 " <td>0</td>\n",
144 " <td>0</td>\n",
145 " <td>1</td>\n",
146 " </tr>\n",
147 " <tr>\n",
148 " <th>1</th>\n",
149 " <td>20180321</td>\n",
150 " <td>20180321</td>\n",
151 " <td>KDT</td>\n",
152 " <td>KDT</td>\n",
153 " <td>?</td>\n",
154 " <td>03F960</td>\n",
155 " <td>KS</td>\n",
156 " <td>?</td>\n",
157 " <td>QC</td>\n",
158 " <td>MISTER</td>\n",
159 " <td>...</td>\n",
160 " <td>1</td>\n",
161 " <td>Y</td>\n",
162 " <td>1</td>\n",
163 " <td>0</td>\n",
164 " <td>0</td>\n",
165 " <td>0</td>\n",
166 " <td>1</td>\n",
167 " <td>0</td>\n",
168 " <td>0</td>\n",
169 " <td>1</td>\n",
170 " </tr>\n",
171 " <tr>\n",
172 " <th>2</th>\n",
173 " <td>20190322</td>\n",
174 " <td>20190322</td>\n",
175 " <td>KDT</td>\n",
176 " <td>KDT</td>\n",
177 " <td>?</td>\n",
178 " <td>03AA2P</td>\n",
179 " <td>KS</td>\n",
180 " <td>Y011161</td>\n",
181 " <td>QC</td>\n",
182 " <td>MISTER</td>\n",
183 " <td>...</td>\n",
184 " <td>1</td>\n",
185 " <td>Y</td>\n",
186 " <td>0</td>\n",
187 " <td>0</td>\n",
188 " <td>0</td>\n",
189 " <td>0</td>\n",
190 " <td>0</td>\n",
191 " <td>0</td>\n",
192 " <td>0</td>\n",
193 " <td>1</td>\n",
194 " </tr>\n",
195 " <tr>\n",
196 " <th>3</th>\n",
197 " <td>20180515</td>\n",
198 " <td>20180515</td>\n",
199 " <td>KDT</td>\n",
200 " <td>KDT</td>\n",
201 " <td>?</td>\n",
202 " <td>0353D0</td>\n",
203 " <td>MK</td>\n",
204 " <td>?</td>\n",
205 " <td>TS</td>\n",
206 " <td>MISTER</td>\n",
207 " <td>...</td>\n",
208 " <td>1</td>\n",
209 " <td>Y</td>\n",
210 " <td>1</td>\n",
211 " <td>0</td>\n",
212 " <td>0</td>\n",
213 " <td>0</td>\n",
214 " <td>1</td>\n",
215 " <td>0</td>\n",
216 " <td>0</td>\n",
217 " <td>2</td>\n",
218 " </tr>\n",
219 " <tr>\n",
220 " <th>4</th>\n",
221 " <td>20190420</td>\n",
222 " <td>20190420</td>\n",
223 " <td>KDT</td>\n",
224 " <td>KDT</td>\n",
225 " <td>?</td>\n",
226 " <td>03F902</td>\n",
227 " <td>KS</td>\n",
228 " <td>Y013437</td>\n",
229 " <td>QC</td>\n",
230 " <td>MISTER</td>\n",
231 " <td>...</td>\n",
232 " <td>1</td>\n",
233 " <td>Y</td>\n",
234 " <td>0</td>\n",
235 " <td>0</td>\n",
236 " <td>0</td>\n",
237 " <td>1</td>\n",
238 " <td>0</td>\n",
239 " <td>0</td>\n",
240 " <td>0</td>\n",
241 " <td>1</td>\n",
242 " </tr>\n",
243 " </tbody>\n",
244 "</table>\n",
245 "<p>5 rows × 23 columns</p>\n",
246 "</div>"
247 ],
248 "text/plain": [
249 " Departure_YMD_LMT Operation_YMD_LMT Departure_Airport Operation_Airport \\\n",
250 "0 20190326 20190326 KDT KDT \n",
251 "1 20180321 20180321 KDT KDT \n",
252 "2 20190322 20190322 KDT KDT \n",
253 "3 20180515 20180515 KDT KDT \n",
254 "4 20190420 20190420 KDT KDT \n",
255 "\n",
256 " Terminal_Number Terminal_Name Operation_Initials Operation_Sonic_Code \\\n",
257 "0 ? 032363 KS ? \n",
258 "1 ? 03F960 KS ? \n",
259 "2 ? 03AA2P KS Y011161 \n",
260 "3 ? 0353D0 MK ? \n",
261 "4 ? 03F902 KS Y013437 \n",
262 "\n",
263 " Operation_Channel Passenger_Title ... SWC_FLY Cabin_Class \\\n",
264 "0 QC MISTER ... 1 Y \n",
265 "1 QC MISTER ... 1 Y \n",
266 "2 QC MISTER ... 1 Y \n",
267 "3 TS MISTER ... 1 Y \n",
268 "4 QC MISTER ... 1 Y \n",
269 "\n",
270 " SWC_FQTV_Member Passenger_Baggage_Count Passenger_Baggage_Weight \\\n",
271 "0 1 0 0 \n",
272 "1 1 0 0 \n",
273 "2 0 0 0 \n",
274 "3 1 0 0 \n",
275 "4 0 0 0 \n",
276 "\n",
277 " SWC_Staff SWC_CIP_Passenger SWC_VIP_Passenger SWC_Has_Infant \\\n",
278 "0 0 1 0 0 \n",
279 "1 0 1 0 0 \n",
280 "2 0 0 0 0 \n",
281 "3 0 1 0 0 \n",
282 "4 1 0 0 0 \n",
283 "\n",
284 " Operation_Count \n",
285 "0 1 \n",
286 "1 1 \n",
287 "2 1 \n",
288 "3 2 \n",
289 "4 1 \n",
290 "\n",
291 "[5 rows x 23 columns]"
292 ]
293 },
294 "execution_count": 3,
295 "metadata": {},
296 "output_type": "execute_result"
297 }
298 ],
299 "source": [
300 "df.head()"
301 ]
302 },
303 {
304 "cell_type": "code",
305 "execution_count": 4,
306 "metadata": {},
307 "outputs": [
308 {
309 "data": {
310 "text/plain": [
311 "(808696, 23)"
312 ]
313 },
314 "execution_count": 4,
315 "metadata": {},
316 "output_type": "execute_result"
317 }
318 ],
319 "source": [
320 "df.shape"
321 ]
322 },
323 {
324 "cell_type": "markdown",
325 "metadata": {},
326 "source": [
327 "Training data provided consists of 808696 rows and 23 columns. Among those 23 columns, 22 of them are features and 1 is - denoted with Operation_Count - is target label.\n",
328 "\n",
329 "Data types of different features are listed below."
330 ]
331 },
332 {
333 "cell_type": "code",
334 "execution_count": 5,
335 "metadata": {},
336 "outputs": [
337 {
338 "data": {
339 "text/plain": [
340 "Departure_YMD_LMT int64\n",
341 "Operation_YMD_LMT int64\n",
342 "Departure_Airport object\n",
343 "Operation_Airport object\n",
344 "Terminal_Number object\n",
345 "Terminal_Name object\n",
346 "Operation_Initials object\n",
347 "Operation_Sonic_Code object\n",
348 "Operation_Channel object\n",
349 "Passenger_Title object\n",
350 "Passenger_Gender object\n",
351 "Inbound_Departure_Airport object\n",
352 "Outbound_Arrival_Airport object\n",
353 "SWC_FLY int64\n",
354 "Cabin_Class object\n",
355 "SWC_FQTV_Member int64\n",
356 "Passenger_Baggage_Count int64\n",
357 "Passenger_Baggage_Weight int64\n",
358 "SWC_Staff int64\n",
359 "SWC_CIP_Passenger int64\n",
360 "SWC_VIP_Passenger int64\n",
361 "SWC_Has_Infant int64\n",
362 "Operation_Count int64\n",
363 "dtype: object"
364 ]
365 },
366 "execution_count": 5,
367 "metadata": {},
368 "output_type": "execute_result"
369 }
370 ],
371 "source": [
372 "df.dtypes"
373 ]
374 },
375 {
376 "cell_type": "markdown",
377 "metadata": {},
378 "source": [
379 "<h2>Data Preprocessing</h2>"
380 ]
381 },
382 {
383 "cell_type": "markdown",
384 "metadata": {},
385 "source": [
386 "<h4>Helper Functions</h4>\n",
387 "\n",
388 "Functions below are used to generate new features from existing ones. You can also check their descriptions from the following list:\n",
389 "\n",
390 "- is_op_airport_different : Returns 0 if check-in operation is done in KDT airport, returns 1 if operation is performed in another airport\n",
391 "- map_operation_channels : Maps operations channels to their channel groups\n",
392 "- map_inbound_flights : Returns 1 if an inbound flight exists for the entry, returns 0 otherwise\n",
393 "- map_outbound_flights : Returns 1 if an outbound flight exists for the entry, returns 0 otherwise\n",
394 "- is_inbound_kdt : Returns 1 if inbound flight location is KDT, returns 0 otherwise\n",
395 "- is_outbound_kdt : Returns 1 if outbound flight location is KDT, returns 0 otherwise"
396 ]
397 },
398 {
399 "cell_type": "code",
400 "execution_count": 6,
401 "metadata": {},
402 "outputs": [],
403 "source": [
404 "def map_operation_channels(channel):\n",
405 " if channel == 'JW' or channel == 'TW':\n",
406 " return 'online'\n",
407 " elif channel == 'TS' or channel == 'JM':\n",
408 " return 'mobile'\n",
409 " elif channel == 'TY' or channel == 'QC':\n",
410 " return 'kontuar'\n",
411 " elif channel == 'SC':\n",
412 " return 'kiosk'\n",
413 " else:\n",
414 " return 'other'\n",
415 "\n",
416 "## This function is not being used becasue \n",
417 "## there is no KG code in dataframe for KGA personnel\n",
418 "## It seems there is a mistake in the pdf file sent to us.\n",
419 "def map_operation_personnel(initial):\n",
420 " if initial == 'KG':\n",
421 " # Returns 1 if operation is performed by\n",
422 " # KGA personnel\n",
423 " return 1\n",
424 " else:\n",
425 " return 0\n",
426 "\n",
427 "def map_inbound_flights(flight):\n",
428 " if flight == '?':\n",
429 " return 0\n",
430 " else:\n",
431 " return 1\n",
432 "\n",
433 "def map_outbound_flights(flight):\n",
434 " if flight == '?':\n",
435 " return 0\n",
436 " else:\n",
437 " return 1\n",
438 "\n",
439 "def is_inbound_kdt(flight):\n",
440 " if flight == 'KDT':\n",
441 " return 1\n",
442 " else:\n",
443 " return 0\n",
444 "\n",
445 "def is_outbound_kdt(flight):\n",
446 " if flight == 'KDT':\n",
447 " return 1\n",
448 " else:\n",
449 " return 0\n",
450 " \n",
451 "def is_op_airport_different(port):\n",
452 " if port != 'KDT':\n",
453 " return 1\n",
454 " else:\n",
455 " return 0"
456 ]
457 },
458 {
459 "cell_type": "markdown",
460 "metadata": {},
461 "source": [
462 "Applying functions defined above to existing features to create binary features."
463 ]
464 },
465 {
466 "cell_type": "code",
467 "execution_count": 7,
468 "metadata": {},
469 "outputs": [],
470 "source": [
471 "#df['Performed_By_KG'] = df['Operation_Initials'].apply(map_operation_personnel)\n",
472 "df['Inbound_Exist'] = df['Inbound_Departure_Airport'].apply(map_inbound_flights)\n",
473 "df['Outbound_Exist'] = df['Outbound_Arrival_Airport'].apply(map_outbound_flights)\n",
474 "df['Is_Inbound_KDT'] = df['Inbound_Departure_Airport'].apply(is_inbound_kdt)\n",
475 "df['Is_Outbound_KDT'] = df['Outbound_Arrival_Airport'].apply(is_outbound_kdt)\n",
476 "df['Different_Operation_Airport'] = df['Operation_Airport'].apply(is_op_airport_different)\n",
477 "df['Mapped_Channels'] = df['Operation_Channel'].apply(map_operation_channels)"
478 ]
479 },
480 {
481 "cell_type": "markdown",
482 "metadata": {},
483 "source": [
484 "Below, cabin class information is mapped to 1s and 0s. Also, for passenger gender and operation channel features, new features are created with one-hot encoding."
485 ]
486 },
487 {
488 "cell_type": "code",
489 "execution_count": 8,
490 "metadata": {},
491 "outputs": [],
492 "source": [
493 "df['Cabin_Class'] = df['Cabin_Class'].map({'Y': 0, 'C': 1})\n",
494 "\n",
495 "onehot_gender = pd.get_dummies(df['Passenger_Gender'])\n",
496 "df = pd.concat([df, onehot_gender], axis=1)\n",
497 "\n",
498 "onehot_channels = pd.get_dummies(df['Mapped_Channels'])\n",
499 "df = pd.concat([df, onehot_channels], axis=1)"
500 ]
501 },
502 {
503 "cell_type": "markdown",
504 "metadata": {},
505 "source": [
506 "Time difference between flight dates of passengers and their check-in date is stored in a feature called 'checkin_fly_difference'."
507 ]
508 },
509 {
510 "cell_type": "code",
511 "execution_count": 9,
512 "metadata": {},
513 "outputs": [
514 {
515 "data": {
516 "text/plain": [
517 "<matplotlib.axes._subplots.AxesSubplot at 0x18718f6e710>"
518 ]
519 },
520 "execution_count": 9,
521 "metadata": {},
522 "output_type": "execute_result"
523 },
524 {
525 "data": {
526 "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW4AAAEECAYAAAAIzd6zAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAADpNJREFUeJzt3X2MZXddx/H3p7vLgy3Ilp1CZXe7SAgI9EkGGkGqFCUVEYy2pESQKmb/MCAa0WBEDZL6gAQwQZFNrUXFNrZSQwq0NF1KQUvtbJ9ou0Vq2YalYqeUCsVA2fL1jzlbbqd3Zs7s3jOzv9n3K7nZe+/87rnfO9289/TMuXdSVUiS2nHEag8gSVoewy1JjTHcktQYwy1JjTHcktQYwy1JjRks3EnOS3JPklt6rD01yfVJ9iU5Y+T+lya5ceTy7SQ/P9TMktSCDHUed5JTgQeAv6+q5y2xdhvwROCtwEer6uIxa44G7gA2V9X/TXxgSWrEYHvcVXU1cN/ofUmekeSyJLuSfCbJs7u1e6rqZuB7i2zyDOATRlvS4W6lj3HvAN5cVc9nbu/6r5fx2LOACwaZSpIasn6lnijJUcCLgIuS7L/7sT0feyxwPHD5MNNJUjtWLNzM7d3fX1UnHcBjXwNcUlXfnfBMktScFTtUUlXfAL6U5EyAzDmx58Nfi4dJJAkY9qySC4CfBDYB/wP8EbAT+ABwLLABuLCq/jjJC4BLgI3At4GvVtVzu+1sA/4N2FJVi/3wUpIOC4OFW5I0DN85KUmNGeSHk5s2bapt27YNsWlJWpN27dp1b1VN9Vk7SLi3bdvGzMzMEJuWpDUpyV1913qoRJIaY7glqTGGW5IaY7glqTGGW5Ias5KfVSKtmpEPNnuYbz5Tq9zj1po3LtqL3S8d6gy3JDXGcEtSYwy3Divvfe97V3sE6aAZbh1WvvKVr6z2CNJBM9w6rLz73e9e7RGkg2a4JakxhluSGmO4teYt9EYb34CjVhlurXkbNmxY1v3Soc5wa83bt2/fsu6XDnWGW5IaY7h1WNm0adNqjyAdtF7hTvKkJBcnuT3J7iQ/NvRg0qRt3LiRnTt3snHjxtUeRToofT/W9S+By6rqjCSPAX5gwJmkQXz961/nhBNOWO0xpIO2ZLiTPBE4FTgboKoeBB4cdixJ0kL6HCr5YWAW+LskNyQ5N8mR8xcl2Z5kJsnM7OzsxAeVJM3pE+71wI8CH6iqk4FvAW+bv6iqdlTVdFVNT01NTXhM6cD5BhytNX3CvRfYW1XXdrcvZi7kUjOq6lEXqVVLhruqvgp8OcmzurteBtw26FSSpAX1PavkzcCHuzNK7gR+ZbiRJEmL6RXuqroRmB54FklSD75zUpIaY7glqTGGW5IaY7glqTGGW5IaY7glqTGGW5IaY7glqTGGW5IaY7glqTGGW5IaY7glqTGGW5IaY7glqTGGW5IaY7glqTGGW5IaY7glqTGGW5IaY7glqTGGW5Ia0+u3vCfZA3wTeAjYV1X+xndJWiW9wt15aVXdO9gkkqRePFQiSY3pG+4CPplkV5LtQw4kSVpc30MlL66qu5McA1yR5Paqunp0QRf07QBbt26d8JiSpP167XFX1d3dn/cAlwAvHLNmR1VNV9X01NTUZKeUJD1syXAnOTLJE/ZfB14O3DL0YJKk8focKnkKcEmS/ev/qaouG3QqSdKClgx3Vd0JnLgCs0iSevB0QElqjOGWpMYYbklqjOGWpMYYbklqjOGWpMYYbklqjOGWpMYYbklqjOGWpMYYbklqjOGWpMYYbklqjOGWpMYYbklqjOGWpMYYbklqjOGWpMYYbklqjOGWpMYYbklqjOGWpMb0DneSdUluSHLpkANJkha3nD3utwC7hxpEktRPr3An2Qz8LHDusONIkpbSd4/7fcDvAt9baEGS7UlmkszMzs5OZDhJ0qMtGe4krwTuqapdi62rqh1VNV1V01NTUxMbUJL0SH32uF8MvCrJHuBC4LQk/zjoVJKkBS0Z7qr6varaXFXbgLOAnVX1usEnkySN5XncktSY9ctZXFVXAVcNMokkqRf3uCWpMYZbkhpjuCWpMYZbkhpjuCWpMYZbkhpjuCWpMYZbkhpjuCWpMYZbkhpjuCWpMYZbkhpjuCWpMYZbkhpjuCWpMYZbkhpjuCWpMYZbkhpjuCWpMYZbkhpjuCWpMYZbkhqzZLiTPC7JfyS5KcmtSd6xEoNJksZb32PNd4DTquqBJBuAzyb5RFV9buDZJEljLBnuqirgge7mhu5SQw4lSVpYr2PcSdYluRG4B7iiqq4ds2Z7kpkkM7Ozs5OeU5LU6RXuqnqoqk4CNgMvTPK8MWt2VNV0VU1PTU1Nek5JUmdZZ5VU1f3AVcDpg0wjSVpSn7NKppI8qbv+eOCngNuHHkySNF6fs0qOBT6UZB1zof/nqrp02LEkSQvpc1bJzcDJKzCLJKkH3zkpSY0x3JLUGMMtSY0x3JLUGMMtSY0x3JLUGMMtSY0x3JLUGMMtSY0x3JLUGMMtSY0x3JLUGMMtSY0x3JLUGMMtSY0x3JLUGMMtSY0x3JLUGMMtSY0x3JLUGMMtSY1ZMtxJtiT5VJLdSW5N8paVGEySNN76Hmv2Ab9dVdcneQKwK8kVVXXbwLNJksZYco+7qv67qq7vrn8T2A08bejBJEnjLesYd5JtwMnAtUMMI0laWu9wJzkK+BfgN6vqG2O+vj3JTJKZ2dnZSc4oSRrRK9xJNjAX7Q9X1UfGramqHVU1XVXTU1NTk5xRkjSiz1klAf4W2F1V7xl+JEnSYvrscb8YeD1wWpIbu8srBp5LkrSAJU8HrKrPAlmBWSRJPfjOSUlqjOGWpMYYbklqjOGWpMYYbklqjOGWpMYYbklqjOGWpMYYbklqjOGWpMYYbklqjOGWpMYYbklqjOGWpMYYbklqjOGWpMYYbklqjOGWpMYYbklqjOGWpMYYbklqjOGWpMYsGe4k5yW5J8ktKzGQJGlxffa4zwdOH3gOSVJPS4a7qq4G7luBWSRJPUzsGHeS7UlmkszMzs5OarOSpHkmFu6q2lFV01U1PTU1NanNSpLm8awSSWqM4ZakxvQ5HfAC4BrgWUn2Jnnj8GNJkhayfqkFVfXalRhEktSPh0okqTGGW5IaY7glqTGGW5IaY7glqTGGW5IaY7glqTGGW5IaY7glqTGGW5IaY7glqTGGW5IaY7glqTGGW5IaY7glqTGGW5IaY7glqTGGW5IaY7glqTGGW5IaY7glqTGGW5Ia0yvcSU5P8oUkdyR529BDSZOW5FEXqVVLhjvJOuCvgJ8BngO8Nslzhh5MmpTRSG/evHns/VJL1vdY80Lgjqq6EyDJhcCrgduGHEyatKp6+LrRVssy+pd57ILkDOD0qvq17vbrgVOq6k3z1m0HtgNs3br1+XfdddcwE2vNOv5Dx6/2CBPz+Td8frVHUGOS7Kqq6T5r++xxj9s1eVTtq2oHsANgenp68X8NpDGGit3+vetxe9xL7bhIh6I+P5zcC2wZub0ZuHuYcaThJGHLli0eJlHz+oT7OuCZSZ6e5DHAWcBHhx1LmpzRveq9e/eOvV9qyZKHSqpqX5I3AZcD64DzqurWwSeTJshIay3pc4ybqvo48PGBZ5Ek9eA7JyWpMYZbkhpjuCWpMYZbkhqz5DsnD2ijySzgWyd1KNoE3LvaQ0hjHFdVU30WDhJu6VCVZKbv24qlQ5WHSiSpMYZbkhpjuHW42bHaA0gHy2PcktQY97glqTGGW5IaY7glqTGGW70kOb/7NXaDbCfJuQfyS6iTTCW5NskNSV6SZE+STZOacXSuJGcm2Z3kU93tC5LcnOS3Dub5pOXq9bGu0tD2/07TA/Ay4PaqegNM/pcAz5vrjcCvV9WnkjwVeFFVHdd3W0nWV9W+iQ6ow5J73BoryS93e5M3JfmH7u5Tk/x7kjtH95qT/E6S67r171hiG6PP8c5u7/aIJFclme7ufyDJOd3jPpfkKQvMeBLwLuAVSW5M8vh5237LyO1zkvzGAttJkvcnuS3Jx4BjRr52VZLpJH8I/DjwN0n+AvgkcEz3vC9J8owklyXZleQzSZ7dPf78JO/p9tL/PMmRSc7rvl83JHl1t+7sJB/ptvHFJO8ameH0JNd3348ru/vGbkeHiary4uURF+C5wBeATd3to4HzgYuY+8f+OcAd3ddezty50em+dilw6rhtdH+eD5zBXHA/yPdPSb0KmO6uF/Bz3fV3AW9fZNazgfeP3N7D3OeRbAOu7+47Avgv4MkLbOMXgCuY+w1PPwTcD5wxZq7R69uAW0a2cSXwzO76KcDOkdd7KbCuu/0nwOu6608C/hM4snsddwI/CDyOuc/62QJMAV8Gnj7v+zh2O6v9d8fLylw8VKJxTgMurqp7Aarqvu4QxL9W1feA20b2gl/eXW7obh8FPBM4cf42Rrb/B8C1VbV9ged/kLnYAewCfnq5L6Cq9iT5WpKTgacAN1TV1xZYfipwQVU9BNydZOdynivJUcCLgItGDtU8dmTJRd22Ye579aokb+1uPw7Y2l2/sqr+t9vmbcBxwEbg6qr6Uve67ltiO7uXM7vaZLg1Tpjb653vO/PW7P/zT6vqg4/YwNxhiYXe3XUd8PwkR88L+n7frar9j32IA/97ei5ze7JPBc5bYu3BvBPtCOD+qjppga9/a+R6gF+sqi+MLkhyCo/8/u5/3Qv9txi7HR0ePMatca4EXpPkyQBJjl5k7eXAr3Z7nSR5WpJjltjGZcCfAR9L8oQhXkDnEuB04AXdnAu5GjgrybokxwIvXc6TVNU3gC8lORMePmZ+4gLLLwfenG7XvPs/gsVcA/xEkqd36/d/H5e7Ha0h7nHrUarq1iTnAJ9O8hDfPwwybu0nk/wIcE3XkAeYO/Y6bhtnjzzuoi7aH03yioFex4PdDwXvHzlUMc4lzB0e+jxzx4o/fQBP90vAB5K8HdgAXAjcNGbdO4H3ATd30d0DvHKR1zCbZDvwkSRHAPcwd+hoWdvR2uJnlWjN6kJ3PXBmVX1xteeRJsVDJVqTMvemmTuY+4Gf0daa4h63mpDk94Ez5919UVWds4xtHA/MP5/8O1V1ysHOJ60kwy1JjfFQiSQ1xnBLUmMMtyQ1xnBLUmP+Hw+O3JTWz8NVAAAAAElFTkSuQmCC\n",
527 "text/plain": [
528 "<Figure size 432x288 with 1 Axes>"
529 ]
530 },
531 "metadata": {
532 "needs_background": "light"
533 },
534 "output_type": "display_data"
535 }
536 ],
537 "source": [
538 "df['Departure_YMD_LMT'] = pd.to_datetime(df['Departure_YMD_LMT'], format='%Y%m%d')\n",
539 "df['Operation_YMD_LMT'] = pd.to_datetime(df['Operation_YMD_LMT'], format='%Y%m%d')\n",
540 "df['checkin_fly_difference'] = df['Departure_YMD_LMT'] - df['Operation_YMD_LMT']\n",
541 "df['checkin_fly_difference'].plot(kind=\"box\")"
542 ]
543 },
544 {
545 "cell_type": "markdown",
546 "metadata": {},
547 "source": [
548 "Features that do not provide additional information - because they are replaced with new features - are dropped."
549 ]
550 },
551 {
552 "cell_type": "code",
553 "execution_count": 10,
554 "metadata": {},
555 "outputs": [],
556 "source": [
557 "processed_df = df.drop(['Departure_YMD_LMT', 'Operation_YMD_LMT','Departure_Airport', 'Operation_Airport', 'Terminal_Number', 'Terminal_Name',\n",
558 " 'Operation_Initials', 'Operation_Sonic_Code', 'Operation_Channel','Passenger_Title', \n",
559 " 'Passenger_Gender', 'Inbound_Departure_Airport','Outbound_Arrival_Airport'], axis=1)"
560 ]
561 },
562 {
563 "cell_type": "markdown",
564 "metadata": {},
565 "source": [
566 "After dropping unnecessary columns, features left are following:"
567 ]
568 },
569 {
570 "cell_type": "code",
571 "execution_count": 11,
572 "metadata": {},
573 "outputs": [
574 {
575 "data": {
576 "text/plain": [
577 "SWC_FLY int64\n",
578 "Cabin_Class float64\n",
579 "SWC_FQTV_Member int64\n",
580 "Passenger_Baggage_Count int64\n",
581 "Passenger_Baggage_Weight int64\n",
582 "SWC_Staff int64\n",
583 "SWC_CIP_Passenger int64\n",
584 "SWC_VIP_Passenger int64\n",
585 "SWC_Has_Infant int64\n",
586 "Operation_Count int64\n",
587 "Inbound_Exist int64\n",
588 "Outbound_Exist int64\n",
589 "Is_Inbound_KDT int64\n",
590 "Is_Outbound_KDT int64\n",
591 "Different_Operation_Airport int64\n",
592 "Mapped_Channels object\n",
593 "? uint8\n",
594 "C uint8\n",
595 "C/INF uint8\n",
596 "F uint8\n",
597 "F/INF uint8\n",
598 "M uint8\n",
599 "M/INF uint8\n",
600 "kiosk uint8\n",
601 "kontuar uint8\n",
602 "mobile uint8\n",
603 "online uint8\n",
604 "other uint8\n",
605 "checkin_fly_difference timedelta64[ns]\n",
606 "dtype: object"
607 ]
608 },
609 "execution_count": 11,
610 "metadata": {},
611 "output_type": "execute_result"
612 }
613 ],
614 "source": [
615 "processed_df.dtypes"
616 ]
617 },
618 {
619 "cell_type": "markdown",
620 "metadata": {},
621 "source": [
622 "Checking if features include null values;"
623 ]
624 },
625 {
626 "cell_type": "code",
627 "execution_count": 12,
628 "metadata": {},
629 "outputs": [
630 {
631 "data": {
632 "text/plain": [
633 "SWC_FLY 0\n",
634 "Cabin_Class 27571\n",
635 "SWC_FQTV_Member 0\n",
636 "Passenger_Baggage_Count 0\n",
637 "Passenger_Baggage_Weight 0\n",
638 "SWC_Staff 0\n",
639 "SWC_CIP_Passenger 0\n",
640 "SWC_VIP_Passenger 0\n",
641 "SWC_Has_Infant 0\n",
642 "Operation_Count 0\n",
643 "Inbound_Exist 0\n",
644 "Outbound_Exist 0\n",
645 "Is_Inbound_KDT 0\n",
646 "Is_Outbound_KDT 0\n",
647 "Different_Operation_Airport 0\n",
648 "Mapped_Channels 0\n",
649 "? 0\n",
650 "C 0\n",
651 "C/INF 0\n",
652 "F 0\n",
653 "F/INF 0\n",
654 "M 0\n",
655 "M/INF 0\n",
656 "kiosk 0\n",
657 "kontuar 0\n",
658 "mobile 0\n",
659 "online 0\n",
660 "other 0\n",
661 "checkin_fly_difference 0\n",
662 "dtype: int64"
663 ]
664 },
665 "execution_count": 12,
666 "metadata": {},
667 "output_type": "execute_result"
668 }
669 ],
670 "source": [
671 "processed_df.isna().sum()"
672 ]
673 },
674 {
675 "cell_type": "markdown",
676 "metadata": {},
677 "source": [
678 "Only Cabin_Class feature includes some null values. Since we do not want to fill this feature with random values, we can drop entries with cabin information having NaN."
679 ]
680 },
681 {
682 "cell_type": "code",
683 "execution_count": 13,
684 "metadata": {},
685 "outputs": [],
686 "source": [
687 "processed_df = processed_df[pd.notnull(processed_df['Cabin_Class'])]"
688 ]
689 },
690 {
691 "cell_type": "code",
692 "execution_count": 14,
693 "metadata": {},
694 "outputs": [],
695 "source": [
696 "processed_df = processed_df.drop(['Mapped_Channels'], axis = 1)\n",
697 "# Converting timedelta typed feature to int\n",
698 "processed_df['checkin_fly_difference'] = processed_df['checkin_fly_difference'].dt.days"
699 ]
700 },
701 {
702 "cell_type": "markdown",
703 "metadata": {},
704 "source": [
705 "<h2>Applying Machine Learning Models on Validation Set</h2>"
706 ]
707 },
708 {
709 "cell_type": "markdown",
710 "metadata": {},
711 "source": [
712 "<h4><b>XGBoost Regressor Model</b></h4>"
713 ]
714 },
715 {
716 "cell_type": "code",
717 "execution_count": 15,
718 "metadata": {},
719 "outputs": [],
720 "source": [
721 "y = processed_df['Operation_Count']\n",
722 "X = processed_df.drop(['Operation_Count'], axis = 1)\n",
723 "\n",
724 "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state=42)"
725 ]
726 },
727 {
728 "cell_type": "code",
729 "execution_count": 16,
730 "metadata": {},
731 "outputs": [
732 {
733 "name": "stdout",
734 "output_type": "stream",
735 "text": [
736 "[19:36:06] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n"
737 ]
738 },
739 {
740 "data": {
741 "text/plain": [
742 "XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n",
743 " colsample_bynode=1, colsample_bytree=1, gamma=0,\n",
744 " importance_type='gain', learning_rate=0.1, max_delta_step=0,\n",
745 " max_depth=3, min_child_weight=1, missing=None, n_estimators=100,\n",
746 " n_jobs=1, nthread=None, objective='reg:linear', random_state=0,\n",
747 " reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,\n",
748 " silent=None, subsample=1, verbosity=1)"
749 ]
750 },
751 "execution_count": 16,
752 "metadata": {},
753 "output_type": "execute_result"
754 }
755 ],
756 "source": [
757 "xgbmodel = xgb.XGBRegressor()\n",
758 "xgbmodel.fit(X_train, y_train)"
759 ]
760 },
761 {
762 "cell_type": "code",
763 "execution_count": 17,
764 "metadata": {},
765 "outputs": [
766 {
767 "data": {
768 "text/plain": [
769 "4.225014413957816"
770 ]
771 },
772 "execution_count": 17,
773 "metadata": {},
774 "output_type": "execute_result"
775 }
776 ],
777 "source": [
778 "pred = xgbmodel.predict(X_test)\n",
779 "mean_squared_error(y_test, pred)"
780 ]
781 },
782 {
783 "cell_type": "code",
784 "execution_count": 18,
785 "metadata": {},
786 "outputs": [
787 {
788 "data": {
789 "text/plain": [
790 "0.17922021632556528"
791 ]
792 },
793 "execution_count": 18,
794 "metadata": {},
795 "output_type": "execute_result"
796 }
797 ],
798 "source": [
799 "r2_score(y_test, pred)"
800 ]
801 },
802 {
803 "cell_type": "code",
804 "execution_count": 19,
805 "metadata": {},
806 "outputs": [
807 {
808 "data": {
809 "image/png": "\n",
810 "text/plain": [
811 "<Figure size 864x864 with 1 Axes>"
812 ]
813 },
814 "metadata": {
815 "needs_background": "light"
816 },
817 "output_type": "display_data"
818 }
819 ],
820 "source": [
821 "feature_importances = pd.Series(xgbmodel.feature_importances_, index=X.columns).sort_values(ascending=False)\n",
822 "\n",
823 "plt.figure(figsize=(12, 12))\n",
824 "sns.barplot(x=feature_importances, y=feature_importances.index)\n",
825 "\n",
826 "# Add labels to our graph \n",
827 "plt.xlabel('Feature Importance Score')\n",
828 "plt.ylabel('Features')\n",
829 "plt.title(\"Feature Importance Rankings\")\n",
830 "plt.show()"
831 ]
832 },
833 {
834 "cell_type": "markdown",
835 "metadata": {},
836 "source": [
837 "<h4>K-Fold Cross Validation</h4>\n",
838 "\n",
839 "10-fold cross validation for XGBoost model is below."
840 ]
841 },
842 {
843 "cell_type": "code",
844 "execution_count": 20,
845 "metadata": {},
846 "outputs": [
847 {
848 "name": "stdout",
849 "output_type": "stream",
850 "text": [
851 "[19:36:58] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
852 "[19:37:57] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
853 "[19:38:55] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
854 "[19:39:54] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
855 "[19:40:53] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
856 "[19:41:51] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
857 "[19:42:51] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
858 "[19:43:49] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
859 "[19:44:48] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n",
860 "[19:45:47] WARNING: C:/Jenkins/workspace/xgboost-win64_release_0.90/src/objective/regression_obj.cu:152: reg:linear is now deprecated in favor of reg:squarederror.\n"
861 ]
862 },
863 {
864 "data": {
865 "text/plain": [
866 "array([0.18317994, 0.18209709, 0.17805348, 0.17923736, 0.17389156,\n",
867 " 0.18128156, 0.16611453, 0.18291924, 0.17375828, 0.1774409 ])"
868 ]
869 },
870 "execution_count": 20,
871 "metadata": {},
872 "output_type": "execute_result"
873 }
874 ],
875 "source": [
876 "from sklearn.model_selection import KFold\n",
877 "from sklearn.model_selection import cross_val_score\n",
878 "\n",
879 "kfold = KFold(n_splits = 10, random_state=7)\n",
880 "xgbresults = cross_val_score(xgbmodel, X, y, cv=kfold)\n",
881 "xgbresults"
882 ]
883 },
884 {
885 "cell_type": "markdown",
886 "metadata": {},
887 "source": [
888 "As seen from the cross-validation scores, R^2 score of XGBoost do not change much. All of the scores after each iteration are very close to R^2 score of initial fit of the model."
889 ]
890 },
891 {
892 "cell_type": "markdown",
893 "metadata": {},
894 "source": [
895 "<h4><b>Random Forest Regressor Model</b></h4>"
896 ]
897 },
898 {
899 "cell_type": "code",
900 "execution_count": 21,
901 "metadata": {},
902 "outputs": [
903 {
904 "data": {
905 "text/plain": [
906 "RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,\n",
907 " max_features='auto', max_leaf_nodes=None,\n",
908 " min_impurity_decrease=0.0, min_impurity_split=None,\n",
909 " min_samples_leaf=1, min_samples_split=2,\n",
910 " min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,\n",
911 " oob_score=False, random_state=None, verbose=0, warm_start=False)"
912 ]
913 },
914 "execution_count": 21,
915 "metadata": {},
916 "output_type": "execute_result"
917 }
918 ],
919 "source": [
920 "rfmodel = RandomForestRegressor(criterion='mse')\n",
921 "rfmodel.fit(X_train, y_train)"
922 ]
923 },
924 {
925 "cell_type": "code",
926 "execution_count": 22,
927 "metadata": {},
928 "outputs": [
929 {
930 "data": {
931 "text/plain": [
932 "3.7485005941444633"
933 ]
934 },
935 "execution_count": 22,
936 "metadata": {},
937 "output_type": "execute_result"
938 }
939 ],
940 "source": [
941 "rf_pred = rfmodel.predict(X_test)\n",
942 "mean_squared_error(y_test, rf_pred)"
943 ]
944 },
945 {
946 "cell_type": "code",
947 "execution_count": 23,
948 "metadata": {},
949 "outputs": [
950 {
951 "data": {
952 "text/plain": [
953 "0.27179100345760343"
954 ]
955 },
956 "execution_count": 23,
957 "metadata": {},
958 "output_type": "execute_result"
959 }
960 ],
961 "source": [
962 "r2_score(y_test, rf_pred)"
963 ]
964 },
965 {
966 "cell_type": "code",
967 "execution_count": 24,
968 "metadata": {},
969 "outputs": [
970 {
971 "data": {
972 "image/png": "\n",
973 "text/plain": [
974 "<Figure size 864x864 with 1 Axes>"
975 ]
976 },
977 "metadata": {
978 "needs_background": "light"
979 },
980 "output_type": "display_data"
981 }
982 ],
983 "source": [
984 "feature_importances = pd.Series(rfmodel.feature_importances_, index=X.columns).sort_values(ascending=False)\n",
985 "\n",
986 "plt.figure(figsize=(12, 12))\n",
987 "sns.barplot(x=feature_importances, y=feature_importances.index)\n",
988 "\n",
989 "# Add labels to our graph \n",
990 "plt.xlabel('Feature Importance Score')\n",
991 "plt.ylabel('Features')\n",
992 "plt.title(\"Feature Importance Rankings\")\n",
993 "plt.show()"
994 ]
995 },
996 {
997 "cell_type": "markdown",
998 "metadata": {},
999 "source": [
1000 "<h4>K-Fold Cross Validation</h4>\n",
1001 "\n",
1002 "10-fold cross validation for Random Forest Regressor model is below."
1003 ]
1004 },
1005 {
1006 "cell_type": "code",
1007 "execution_count": 25,
1008 "metadata": {},
1009 "outputs": [
1010 {
1011 "data": {
1012 "text/plain": [
1013 "array([0.28349469, 0.2705083 , 0.26981818, 0.26610356, 0.25349095,\n",
1014 " 0.27755084, 0.24296661, 0.26295131, 0.27446709, 0.25949676])"
1015 ]
1016 },
1017 "execution_count": 25,
1018 "metadata": {},
1019 "output_type": "execute_result"
1020 }
1021 ],
1022 "source": [
1023 "kfold = KFold(n_splits = 10, random_state=7)\n",
1024 "rf_results = cross_val_score(rfmodel, X, y, cv=kfold)\n",
1025 "rf_results"
1026 ]
1027 },
1028 {
1029 "cell_type": "markdown",
1030 "metadata": {},
1031 "source": [
1032 "Similar to cross-validation results of XGBoost, R^2 scores of Random Forest Regressor also do not change significantly. For both models, we can consider initial scores as scores for these models."
1033 ]
1034 },
1035 {
1036 "cell_type": "markdown",
1037 "metadata": {},
1038 "source": [
1039 "<h2>Insights From Machine Learning Models</h2>"
1040 ]
1041 },
1042 {
1043 "cell_type": "markdown",
1044 "metadata": {},
1045 "source": [
1046 "1. For both models, it can be seend that most important features are very similar. Most important features can be listed as Passanger_Baggage_Count, Inbound_Exist, Outbound_Exist, kontuar and SWC_CIP_Passenger. Among those features, Inbound_Exist and Outbound_Exist are ones created by us. These being important features for regression operations can be interpreted as presence or absence of inbound and outbound flights do affect number of times check-in operations are being made. Also, from the importance graphs above, it can be seen that check-in operation being made from kontuar also affects number of times check-in operations are attempted.\n",
1047 "\n",
1048 "2. When two models compared, Random Forest Regressor model seem to perform better for prediction operation counts in validation set. This may be due to XGBoost model overfitting to training data after boosting iterations. Parameters of both models can be tuned to further analyze their performances.\n",
1049 "\n",
1050 "3. With current parameters, for predicting operation counts in test file, Random Forest Regressor should be used, since its evaluation metrics are better."
1051 ]
1052 },
1053 {
1054 "cell_type": "markdown",
1055 "metadata": {},
1056 "source": [
1057 "<h4>Conducting Hypothesis Testing from Insights</h4>"
1058 ]
1059 },
1060 {
1061 "cell_type": "markdown",
1062 "metadata": {},
1063 "source": [
1064 "<b>1.</b> For first hypothesis test, we will analyzse whether check-in operation is being performed from kontuar cause statistically significant difference on number of operations made compared to other forms of check-in channels. "
1065 ]
1066 },
1067 {
1068 "cell_type": "code",
1069 "execution_count": 26,
1070 "metadata": {},
1071 "outputs": [],
1072 "source": [
1073 "kontuar_ops = processed_df[processed_df['kontuar'] == 1]['Operation_Count']\n",
1074 "other_ops = processed_df[processed_df['kontuar'] == 0]['Operation_Count']"
1075 ]
1076 },
1077 {
1078 "cell_type": "code",
1079 "execution_count": 27,
1080 "metadata": {},
1081 "outputs": [
1082 {
1083 "name": "stdout",
1084 "output_type": "stream",
1085 "text": [
1086 "Mean value of operation count when check-in is performed in kontuar: 1.31\n"
1087 ]
1088 }
1089 ],
1090 "source": [
1091 "print(\"Mean value of operation count when check-in is performed in kontuar: {:.2f}\".format(kontuar_ops.mean()))"
1092 ]
1093 },
1094 {
1095 "cell_type": "code",
1096 "execution_count": 28,
1097 "metadata": {},
1098 "outputs": [
1099 {
1100 "name": "stdout",
1101 "output_type": "stream",
1102 "text": [
1103 "Mean value of operation count when check-in is NOT performed in kontuar: 1.76\n"
1104 ]
1105 }
1106 ],
1107 "source": [
1108 "print(\"Mean value of operation count when check-in is NOT performed in kontuar: {:.2f}\".format(other_ops.mean()))"
1109 ]
1110 },
1111 {
1112 "cell_type": "code",
1113 "execution_count": 38,
1114 "metadata": {},
1115 "outputs": [
1116 {
1117 "data": {
1118 "image/png": "\n",
1119 "text/plain": [
1120 "<Figure size 1008x432 with 3 Axes>"
1121 ]
1122 },
1123 "metadata": {
1124 "needs_background": "light"
1125 },
1126 "output_type": "display_data"
1127 }
1128 ],
1129 "source": [
1130 "fig, ax = plt.subplots(1, 3, figsize=(14,6)) # a figure with 1 row and 3 columns\n",
1131 " # ax variable stores a list with 3 elements\n",
1132 " # each element in ax correspons to chart\n",
1133 " \n",
1134 "kontuar_ops.plot(kind=\"hist\", ax=ax[0], bins=40, label=\"k\", color=\"c\")\n",
1135 "ax[0].set_title(\"From Kontuar\")\n",
1136 "\n",
1137 "other_ops.plot(kind=\"hist\", ax=ax[1], bins=40, label=\"o\", color=\"m\")\n",
1138 "ax[1].set_title(\"From Other Channels\")\n",
1139 "\n",
1140 "sns.kdeplot(kontuar_ops, shade=True, label=\"kontuar operations\", ax=ax[2], color=\"c\")\n",
1141 "sns.kdeplot(other_ops, shade=True, label=\"non-kontuar operations\", ax=ax[2], color=\"m\")\n",
1142 "ax[2].set_title(\"Comparison with KDE\")\n",
1143 "\n",
1144 "plt.suptitle(\"Operation Count Distributions\")\n",
1145 "plt.show()"
1146 ]
1147 },
1148 {
1149 "cell_type": "code",
1150 "execution_count": 34,
1151 "metadata": {},
1152 "outputs": [
1153 {
1154 "data": {
1155 "text/plain": [
1156 "Ttest_indResult(statistic=-88.80864853163777, pvalue=0.0)"
1157 ]
1158 },
1159 "execution_count": 34,
1160 "metadata": {},
1161 "output_type": "execute_result"
1162 }
1163 ],
1164 "source": [
1165 "# One tail t-test between 2 distributions\n",
1166 "stats.ttest_ind(kontuar_ops, other_ops, equal_var=False)"
1167 ]
1168 },
1169 {
1170 "cell_type": "markdown",
1171 "metadata": {},
1172 "source": [
1173 "t-test between 2 distribution gives us p-value of 0.0. Interpreting this value as 0.001 and considering threshold value for significance is 0.05, this result tell us that difference between two distributions are statistically significant. Additionally, after checking means of two distributions, we can conclude that doing check-in in kontuar is likely to be completed in less number of operations compared to doing it in other ways. "
1174 ]
1175 },
1176 {
1177 "cell_type": "markdown",
1178 "metadata": {},
1179 "source": [
1180 "<b>2.</b> For the second hypothesis test, we will compare two distributions of operations counts for different values Inbound_Exist feature. By doing that, we will test if existance/absence of inbound flight actually affect number of operations performed for check-ins of passengers."
1181 ]
1182 },
1183 {
1184 "cell_type": "code",
1185 "execution_count": 35,
1186 "metadata": {},
1187 "outputs": [],
1188 "source": [
1189 "inbound_present = processed_df[processed_df['Inbound_Exist'] == 1]['Operation_Count']\n",
1190 "inbound_absent = processed_df[processed_df['Inbound_Exist'] == 0]['Operation_Count']"
1191 ]
1192 },
1193 {
1194 "cell_type": "code",
1195 "execution_count": 36,
1196 "metadata": {},
1197 "outputs": [
1198 {
1199 "name": "stdout",
1200 "output_type": "stream",
1201 "text": [
1202 "Mean value of operation count when there is an inbound flight: 1.24\n"
1203 ]
1204 }
1205 ],
1206 "source": [
1207 "print(\"Mean value of operation count when there is an inbound flight: {:.2f}\".format(inbound_present.mean()))"
1208 ]
1209 },
1210 {
1211 "cell_type": "code",
1212 "execution_count": 37,
1213 "metadata": {},
1214 "outputs": [
1215 {
1216 "name": "stdout",
1217 "output_type": "stream",
1218 "text": [
1219 "Mean value of operation count when there is not an inbound flight: 1.57\n"
1220 ]
1221 }
1222 ],
1223 "source": [
1224 "print(\"Mean value of operation count when there is not an inbound flight: {:.2f}\".format(inbound_absent.mean()))"
1225 ]
1226 },
1227 {
1228 "cell_type": "code",
1229 "execution_count": 39,
1230 "metadata": {},
1231 "outputs": [
1232 {
1233 "data": {
1234 "image/png": "\n",
1235 "text/plain": [
1236 "<Figure size 1008x432 with 3 Axes>"
1237 ]
1238 },
1239 "metadata": {
1240 "needs_background": "light"
1241 },
1242 "output_type": "display_data"
1243 }
1244 ],
1245 "source": [
1246 "fig, ax = plt.subplots(1, 3, figsize=(14,6)) # a figure with 1 row and 3 columns\n",
1247 " # ax variable stores a list with 3 elements\n",
1248 " # each element in ax correspons to chart\n",
1249 " \n",
1250 "inbound_present.plot(kind=\"hist\", ax=ax[0], bins=40, label=\"inb\", color=\"b\")\n",
1251 "ax[0].set_title(\"Inbound Present\")\n",
1252 "\n",
1253 "inbound_absent.plot(kind=\"hist\", ax=ax[1], bins=40, label=\"no\", color=\"r\")\n",
1254 "ax[1].set_title(\"Inbound Absent\")\n",
1255 "\n",
1256 "sns.kdeplot(inbound_present, shade=True, label=\"inbound flights\", ax=ax[2], color=\"b\")\n",
1257 "sns.kdeplot(inbound_absent, shade=True, label=\"no inbound flights\", ax=ax[2], color=\"r\")\n",
1258 "ax[2].set_title(\"Comparison with KDE\")\n",
1259 "\n",
1260 "plt.suptitle(\"Operation Count Distributions\")\n",
1261 "plt.show()"
1262 ]
1263 },
1264 {
1265 "cell_type": "code",
1266 "execution_count": 40,
1267 "metadata": {},
1268 "outputs": [
1269 {
1270 "data": {
1271 "text/plain": [
1272 "Ttest_indResult(statistic=-89.01460439966546, pvalue=0.0)"
1273 ]
1274 },
1275 "execution_count": 40,
1276 "metadata": {},
1277 "output_type": "execute_result"
1278 }
1279 ],
1280 "source": [
1281 "# One tail t-test between 2 distributions\n",
1282 "stats.ttest_ind(inbound_present, inbound_absent, equal_var=False)"
1283 ]
1284 },
1285 {
1286 "cell_type": "markdown",
1287 "metadata": {},
1288 "source": [
1289 "Again, t-test result is 0.0. Interpreting this value as 0.001 and considering threshold value for significance is 0.05, this result tell us that difference between two distributions are statistically significant. If we combine this result with observation of means of distributions, we can conclude that presence of inbound flights results less number of operations for check-in. However, it should be noted that size of the distribution where inbound flight is present is much lower that where inbound flight is not present."
1290 ]
1291 },
1292 {
1293 "cell_type": "markdown",
1294 "metadata": {},
1295 "source": [
1296 "<h2>Predicting Labels in Test Set<h2>"
1297 ]
1298 },
1299 {
1300 "cell_type": "markdown",
1301 "metadata": {},
1302 "source": [
1303 "As done in training file, test file should be preprocessed as well. Below, same operations performed on training csv file are also performed on training data.\n"
1304 ]
1305 },
1306 {
1307 "cell_type": "code",
1308 "execution_count": 47,
1309 "metadata": {},
1310 "outputs": [],
1311 "source": [
1312 "test_df = pd.read_csv(r\"C:\\Users\\Mert\\Desktop\\assessment\\assessment\\Assessment Data\\Assessment Result File.csv\")"
1313 ]
1314 },
1315 {
1316 "cell_type": "code",
1317 "execution_count": 48,
1318 "metadata": {},
1319 "outputs": [
1320 {
1321 "data": {
1322 "text/html": [
1323 "<div>\n",
1324 "<style scoped>\n",
1325 " .dataframe tbody tr th:only-of-type {\n",
1326 " vertical-align: middle;\n",
1327 " }\n",
1328 "\n",
1329 " .dataframe tbody tr th {\n",
1330 " vertical-align: top;\n",
1331 " }\n",
1332 "\n",
1333 " .dataframe thead th {\n",
1334 " text-align: right;\n",
1335 " }\n",
1336 "</style>\n",
1337 "<table border=\"1\" class=\"dataframe\">\n",
1338 " <thead>\n",
1339 " <tr style=\"text-align: right;\">\n",
1340 " <th></th>\n",
1341 " <th>Departure_YMD_LMT</th>\n",
1342 " <th>Operation_YMD_LMT</th>\n",
1343 " <th>Departure_Airport</th>\n",
1344 " <th>Operation_Airport</th>\n",
1345 " <th>Terminal_Number</th>\n",
1346 " <th>Terminal_Name</th>\n",
1347 " <th>Operation_Initials</th>\n",
1348 " <th>Operation_Sonic_Code</th>\n",
1349 " <th>Operation_Channel</th>\n",
1350 " <th>Passenger_Title</th>\n",
1351 " <th>...</th>\n",
1352 " <th>SWC_FLY</th>\n",
1353 " <th>Cabin_Class</th>\n",
1354 " <th>SWC_FQTV_Member</th>\n",
1355 " <th>Passenger_Baggage_Count</th>\n",
1356 " <th>Passenger_Baggage_Weight</th>\n",
1357 " <th>SWC_Staff</th>\n",
1358 " <th>SWC_CIP_Passenger</th>\n",
1359 " <th>SWC_VIP_Passenger</th>\n",
1360 " <th>SWC_Has_Infant</th>\n",
1361 " <th>Operation_Count</th>\n",
1362 " </tr>\n",
1363 " </thead>\n",
1364 " <tbody>\n",
1365 " <tr>\n",
1366 " <th>0</th>\n",
1367 " <td>20190608</td>\n",
1368 " <td>20190607</td>\n",
1369 " <td>KDT</td>\n",
1370 " <td>KDT</td>\n",
1371 " <td>?</td>\n",
1372 " <td>03539F</td>\n",
1373 " <td>MK</td>\n",
1374 " <td>?</td>\n",
1375 " <td>TS</td>\n",
1376 " <td>MISS</td>\n",
1377 " <td>...</td>\n",
1378 " <td>1</td>\n",
1379 " <td>Y</td>\n",
1380 " <td>1</td>\n",
1381 " <td>1</td>\n",
1382 " <td>17</td>\n",
1383 " <td>0</td>\n",
1384 " <td>0</td>\n",
1385 " <td>0</td>\n",
1386 " <td>0</td>\n",
1387 " <td></td>\n",
1388 " </tr>\n",
1389 " <tr>\n",
1390 " <th>1</th>\n",
1391 " <td>20190609</td>\n",
1392 " <td>20190609</td>\n",
1393 " <td>KDT</td>\n",
1394 " <td>KDT</td>\n",
1395 " <td>?</td>\n",
1396 " <td>03F39C</td>\n",
1397 " <td>KS</td>\n",
1398 " <td>Y013171</td>\n",
1399 " <td>QC</td>\n",
1400 " <td>MISTER</td>\n",
1401 " <td>...</td>\n",
1402 " <td>1</td>\n",
1403 " <td>Y</td>\n",
1404 " <td>0</td>\n",
1405 " <td>1</td>\n",
1406 " <td>7</td>\n",
1407 " <td>0</td>\n",
1408 " <td>0</td>\n",
1409 " <td>0</td>\n",
1410 " <td>0</td>\n",
1411 " <td></td>\n",
1412 " </tr>\n",
1413 " <tr>\n",
1414 " <th>2</th>\n",
1415 " <td>20190612</td>\n",
1416 " <td>20190612</td>\n",
1417 " <td>KDT</td>\n",
1418 " <td>KDT</td>\n",
1419 " <td>?</td>\n",
1420 " <td>032A72</td>\n",
1421 " <td>KS</td>\n",
1422 " <td>Y013347</td>\n",
1423 " <td>QC</td>\n",
1424 " <td>MISTER</td>\n",
1425 " <td>...</td>\n",
1426 " <td>1</td>\n",
1427 " <td>Y</td>\n",
1428 " <td>1</td>\n",
1429 " <td>0</td>\n",
1430 " <td>0</td>\n",
1431 " <td>0</td>\n",
1432 " <td>0</td>\n",
1433 " <td>0</td>\n",
1434 " <td>0</td>\n",
1435 " <td></td>\n",
1436 " </tr>\n",
1437 " <tr>\n",
1438 " <th>3</th>\n",
1439 " <td>20190613</td>\n",
1440 " <td>20190612</td>\n",
1441 " <td>KDT</td>\n",
1442 " <td>KDT</td>\n",
1443 " <td>?</td>\n",
1444 " <td>0302F9</td>\n",
1445 " <td>Q7</td>\n",
1446 " <td>?</td>\n",
1447 " <td>TW</td>\n",
1448 " <td>MISTER</td>\n",
1449 " <td>...</td>\n",
1450 " <td>1</td>\n",
1451 " <td>Y</td>\n",
1452 " <td>1</td>\n",
1453 " <td>0</td>\n",
1454 " <td>0</td>\n",
1455 " <td>0</td>\n",
1456 " <td>1</td>\n",
1457 " <td>0</td>\n",
1458 " <td>0</td>\n",
1459 " <td></td>\n",
1460 " </tr>\n",
1461 " <tr>\n",
1462 " <th>4</th>\n",
1463 " <td>20190602</td>\n",
1464 " <td>20190601</td>\n",
1465 " <td>KDT</td>\n",
1466 " <td>KDT</td>\n",
1467 " <td>?</td>\n",
1468 " <td>03023B</td>\n",
1469 " <td>MK</td>\n",
1470 " <td>?</td>\n",
1471 " <td>TW</td>\n",
1472 " <td>MISS</td>\n",
1473 " <td>...</td>\n",
1474 " <td>1</td>\n",
1475 " <td>Y</td>\n",
1476 " <td>0</td>\n",
1477 " <td>1</td>\n",
1478 " <td>18</td>\n",
1479 " <td>0</td>\n",
1480 " <td>0</td>\n",
1481 " <td>0</td>\n",
1482 " <td>0</td>\n",
1483 " <td></td>\n",
1484 " </tr>\n",
1485 " </tbody>\n",
1486 "</table>\n",
1487 "<p>5 rows × 23 columns</p>\n",
1488 "</div>"
1489 ],
1490 "text/plain": [
1491 " Departure_YMD_LMT Operation_YMD_LMT Departure_Airport Operation_Airport \\\n",
1492 "0 20190608 20190607 KDT KDT \n",
1493 "1 20190609 20190609 KDT KDT \n",
1494 "2 20190612 20190612 KDT KDT \n",
1495 "3 20190613 20190612 KDT KDT \n",
1496 "4 20190602 20190601 KDT KDT \n",
1497 "\n",
1498 " Terminal_Number Terminal_Name Operation_Initials Operation_Sonic_Code \\\n",
1499 "0 ? 03539F MK ? \n",
1500 "1 ? 03F39C KS Y013171 \n",
1501 "2 ? 032A72 KS Y013347 \n",
1502 "3 ? 0302F9 Q7 ? \n",
1503 "4 ? 03023B MK ? \n",
1504 "\n",
1505 " Operation_Channel Passenger_Title ... SWC_FLY Cabin_Class \\\n",
1506 "0 TS MISS ... 1 Y \n",
1507 "1 QC MISTER ... 1 Y \n",
1508 "2 QC MISTER ... 1 Y \n",
1509 "3 TW MISTER ... 1 Y \n",
1510 "4 TW MISS ... 1 Y \n",
1511 "\n",
1512 " SWC_FQTV_Member Passenger_Baggage_Count Passenger_Baggage_Weight \\\n",
1513 "0 1 1 17 \n",
1514 "1 0 1 7 \n",
1515 "2 1 0 0 \n",
1516 "3 1 0 0 \n",
1517 "4 0 1 18 \n",
1518 "\n",
1519 " SWC_Staff SWC_CIP_Passenger SWC_VIP_Passenger SWC_Has_Infant \\\n",
1520 "0 0 0 0 0 \n",
1521 "1 0 0 0 0 \n",
1522 "2 0 0 0 0 \n",
1523 "3 0 1 0 0 \n",
1524 "4 0 0 0 0 \n",
1525 "\n",
1526 " Operation_Count \n",
1527 "0 \n",
1528 "1 \n",
1529 "2 \n",
1530 "3 \n",
1531 "4 \n",
1532 "\n",
1533 "[5 rows x 23 columns]"
1534 ]
1535 },
1536 "execution_count": 48,
1537 "metadata": {},
1538 "output_type": "execute_result"
1539 }
1540 ],
1541 "source": [
1542 "test_df.head()"
1543 ]
1544 },
1545 {
1546 "cell_type": "code",
1547 "execution_count": 49,
1548 "metadata": {},
1549 "outputs": [
1550 {
1551 "data": {
1552 "text/plain": [
1553 "Departure_YMD_LMT int64\n",
1554 "Operation_YMD_LMT int64\n",
1555 "Departure_Airport object\n",
1556 "Operation_Airport object\n",
1557 "Terminal_Number object\n",
1558 "Terminal_Name object\n",
1559 "Operation_Initials object\n",
1560 "Operation_Sonic_Code object\n",
1561 "Operation_Channel object\n",
1562 "Passenger_Title object\n",
1563 "Passenger_Gender object\n",
1564 "Inbound_Departure_Airport object\n",
1565 "Outbound_Arrival_Airport object\n",
1566 "SWC_FLY int64\n",
1567 "Cabin_Class object\n",
1568 "SWC_FQTV_Member int64\n",
1569 "Passenger_Baggage_Count int64\n",
1570 "Passenger_Baggage_Weight int64\n",
1571 "SWC_Staff int64\n",
1572 "SWC_CIP_Passenger int64\n",
1573 "SWC_VIP_Passenger int64\n",
1574 "SWC_Has_Infant int64\n",
1575 "Operation_Count object\n",
1576 "dtype: object"
1577 ]
1578 },
1579 "execution_count": 49,
1580 "metadata": {},
1581 "output_type": "execute_result"
1582 }
1583 ],
1584 "source": [
1585 "test_df.dtypes"
1586 ]
1587 },
1588 {
1589 "cell_type": "code",
1590 "execution_count": 50,
1591 "metadata": {},
1592 "outputs": [],
1593 "source": [
1594 "# Performing all preprocessing in this cell\n",
1595 "\n",
1596 "#test_df['Performed_By_KG'] = test_df['Operation_Initials'].apply(map_operation_personnel)\n",
1597 "test_df['Inbound_Exist'] = test_df['Inbound_Departure_Airport'].apply(map_inbound_flights)\n",
1598 "test_df['Outbound_Exist'] = test_df['Outbound_Arrival_Airport'].apply(map_outbound_flights)\n",
1599 "test_df['Is_Inbound_KDT'] = test_df['Inbound_Departure_Airport'].apply(is_inbound_kdt)\n",
1600 "test_df['Is_Outbound_KDT'] = test_df['Outbound_Arrival_Airport'].apply(is_outbound_kdt)\n",
1601 "test_df['Different_Operation_Airport'] = test_df['Operation_Airport'].apply(is_op_airport_different)\n",
1602 "test_df['Mapped_Channels'] = test_df['Operation_Channel'].apply(map_operation_channels)\n",
1603 "\n",
1604 "test_df['Cabin_Class'] = test_df['Cabin_Class'].map({'Y': 0, 'C': 1})\n",
1605 "\n",
1606 "onehot_gender = pd.get_dummies(test_df['Passenger_Gender'])\n",
1607 "test_df = pd.concat([test_df, onehot_gender], axis=1)\n",
1608 "\n",
1609 "onehot_channels = pd.get_dummies(test_df['Mapped_Channels'])\n",
1610 "test_df = pd.concat([test_df, onehot_channels], axis=1)\n",
1611 "\n",
1612 "test_df['Departure_YMD_LMT'] = pd.to_datetime(test_df['Departure_YMD_LMT'], format='%Y%m%d')\n",
1613 "test_df['Operation_YMD_LMT'] = pd.to_datetime(test_df['Operation_YMD_LMT'], format='%Y%m%d')\n",
1614 "test_df['checkin_fly_difference'] = test_df['Departure_YMD_LMT'] - test_df['Operation_YMD_LMT']\n",
1615 "\n",
1616 "processed_test_df = test_df.drop(['Departure_YMD_LMT', 'Operation_YMD_LMT','Departure_Airport', 'Operation_Airport', 'Terminal_Number', 'Terminal_Name',\n",
1617 " 'Operation_Initials', 'Operation_Sonic_Code', 'Operation_Channel','Passenger_Title', \n",
1618 " 'Passenger_Gender', 'Inbound_Departure_Airport','Outbound_Arrival_Airport'], axis=1)"
1619 ]
1620 },
1621 {
1622 "cell_type": "code",
1623 "execution_count": 52,
1624 "metadata": {},
1625 "outputs": [
1626 {
1627 "data": {
1628 "text/html": [
1629 "<div>\n",
1630 "<style scoped>\n",
1631 " .dataframe tbody tr th:only-of-type {\n",
1632 " vertical-align: middle;\n",
1633 " }\n",
1634 "\n",
1635 " .dataframe tbody tr th {\n",
1636 " vertical-align: top;\n",
1637 " }\n",
1638 "\n",
1639 " .dataframe thead th {\n",
1640 " text-align: right;\n",
1641 " }\n",
1642 "</style>\n",
1643 "<table border=\"1\" class=\"dataframe\">\n",
1644 " <thead>\n",
1645 " <tr style=\"text-align: right;\">\n",
1646 " <th></th>\n",
1647 " <th>SWC_FLY</th>\n",
1648 " <th>Cabin_Class</th>\n",
1649 " <th>SWC_FQTV_Member</th>\n",
1650 " <th>Passenger_Baggage_Count</th>\n",
1651 " <th>Passenger_Baggage_Weight</th>\n",
1652 " <th>SWC_Staff</th>\n",
1653 " <th>SWC_CIP_Passenger</th>\n",
1654 " <th>SWC_VIP_Passenger</th>\n",
1655 " <th>SWC_Has_Infant</th>\n",
1656 " <th>Operation_Count</th>\n",
1657 " <th>...</th>\n",
1658 " <th>?</th>\n",
1659 " <th>C</th>\n",
1660 " <th>F</th>\n",
1661 " <th>M</th>\n",
1662 " <th>kiosk</th>\n",
1663 " <th>kontuar</th>\n",
1664 " <th>mobile</th>\n",
1665 " <th>online</th>\n",
1666 " <th>other</th>\n",
1667 " <th>checkin_fly_difference</th>\n",
1668 " </tr>\n",
1669 " </thead>\n",
1670 " <tbody>\n",
1671 " <tr>\n",
1672 " <th>0</th>\n",
1673 " <td>1</td>\n",
1674 " <td>0</td>\n",
1675 " <td>1</td>\n",
1676 " <td>1</td>\n",
1677 " <td>17</td>\n",
1678 " <td>0</td>\n",
1679 " <td>0</td>\n",
1680 " <td>0</td>\n",
1681 " <td>0</td>\n",
1682 " <td></td>\n",
1683 " <td>...</td>\n",
1684 " <td>0</td>\n",
1685 " <td>0</td>\n",
1686 " <td>1</td>\n",
1687 " <td>0</td>\n",
1688 " <td>0</td>\n",
1689 " <td>0</td>\n",
1690 " <td>1</td>\n",
1691 " <td>0</td>\n",
1692 " <td>0</td>\n",
1693 " <td>1 days</td>\n",
1694 " </tr>\n",
1695 " <tr>\n",
1696 " <th>1</th>\n",
1697 " <td>1</td>\n",
1698 " <td>0</td>\n",
1699 " <td>0</td>\n",
1700 " <td>1</td>\n",
1701 " <td>7</td>\n",
1702 " <td>0</td>\n",
1703 " <td>0</td>\n",
1704 " <td>0</td>\n",
1705 " <td>0</td>\n",
1706 " <td></td>\n",
1707 " <td>...</td>\n",
1708 " <td>0</td>\n",
1709 " <td>0</td>\n",
1710 " <td>0</td>\n",
1711 " <td>1</td>\n",
1712 " <td>0</td>\n",
1713 " <td>1</td>\n",
1714 " <td>0</td>\n",
1715 " <td>0</td>\n",
1716 " <td>0</td>\n",
1717 " <td>0 days</td>\n",
1718 " </tr>\n",
1719 " <tr>\n",
1720 " <th>2</th>\n",
1721 " <td>1</td>\n",
1722 " <td>0</td>\n",
1723 " <td>1</td>\n",
1724 " <td>0</td>\n",
1725 " <td>0</td>\n",
1726 " <td>0</td>\n",
1727 " <td>0</td>\n",
1728 " <td>0</td>\n",
1729 " <td>0</td>\n",
1730 " <td></td>\n",
1731 " <td>...</td>\n",
1732 " <td>0</td>\n",
1733 " <td>0</td>\n",
1734 " <td>0</td>\n",
1735 " <td>1</td>\n",
1736 " <td>0</td>\n",
1737 " <td>1</td>\n",
1738 " <td>0</td>\n",
1739 " <td>0</td>\n",
1740 " <td>0</td>\n",
1741 " <td>0 days</td>\n",
1742 " </tr>\n",
1743 " <tr>\n",
1744 " <th>3</th>\n",
1745 " <td>1</td>\n",
1746 " <td>0</td>\n",
1747 " <td>1</td>\n",
1748 " <td>0</td>\n",
1749 " <td>0</td>\n",
1750 " <td>0</td>\n",
1751 " <td>1</td>\n",
1752 " <td>0</td>\n",
1753 " <td>0</td>\n",
1754 " <td></td>\n",
1755 " <td>...</td>\n",
1756 " <td>0</td>\n",
1757 " <td>0</td>\n",
1758 " <td>0</td>\n",
1759 " <td>1</td>\n",
1760 " <td>0</td>\n",
1761 " <td>0</td>\n",
1762 " <td>0</td>\n",
1763 " <td>1</td>\n",
1764 " <td>0</td>\n",
1765 " <td>1 days</td>\n",
1766 " </tr>\n",
1767 " <tr>\n",
1768 " <th>4</th>\n",
1769 " <td>1</td>\n",
1770 " <td>0</td>\n",
1771 " <td>0</td>\n",
1772 " <td>1</td>\n",
1773 " <td>18</td>\n",
1774 " <td>0</td>\n",
1775 " <td>0</td>\n",
1776 " <td>0</td>\n",
1777 " <td>0</td>\n",
1778 " <td></td>\n",
1779 " <td>...</td>\n",
1780 " <td>0</td>\n",
1781 " <td>0</td>\n",
1782 " <td>1</td>\n",
1783 " <td>0</td>\n",
1784 " <td>0</td>\n",
1785 " <td>0</td>\n",
1786 " <td>0</td>\n",
1787 " <td>1</td>\n",
1788 " <td>0</td>\n",
1789 " <td>1 days</td>\n",
1790 " </tr>\n",
1791 " </tbody>\n",
1792 "</table>\n",
1793 "<p>5 rows × 26 columns</p>\n",
1794 "</div>"
1795 ],
1796 "text/plain": [
1797 " SWC_FLY Cabin_Class SWC_FQTV_Member Passenger_Baggage_Count \\\n",
1798 "0 1 0 1 1 \n",
1799 "1 1 0 0 1 \n",
1800 "2 1 0 1 0 \n",
1801 "3 1 0 1 0 \n",
1802 "4 1 0 0 1 \n",
1803 "\n",
1804 " Passenger_Baggage_Weight SWC_Staff SWC_CIP_Passenger SWC_VIP_Passenger \\\n",
1805 "0 17 0 0 0 \n",
1806 "1 7 0 0 0 \n",
1807 "2 0 0 0 0 \n",
1808 "3 0 0 1 0 \n",
1809 "4 18 0 0 0 \n",
1810 "\n",
1811 " SWC_Has_Infant Operation_Count ... ? C F M kiosk \\\n",
1812 "0 0 ... 0 0 1 0 0 \n",
1813 "1 0 ... 0 0 0 1 0 \n",
1814 "2 0 ... 0 0 0 1 0 \n",
1815 "3 0 ... 0 0 0 1 0 \n",
1816 "4 0 ... 0 0 1 0 0 \n",
1817 "\n",
1818 " kontuar mobile online other checkin_fly_difference \n",
1819 "0 0 1 0 0 1 days \n",
1820 "1 1 0 0 0 0 days \n",
1821 "2 1 0 0 0 0 days \n",
1822 "3 0 0 1 0 1 days \n",
1823 "4 0 0 1 0 1 days \n",
1824 "\n",
1825 "[5 rows x 26 columns]"
1826 ]
1827 },
1828 "execution_count": 52,
1829 "metadata": {},
1830 "output_type": "execute_result"
1831 }
1832 ],
1833 "source": [
1834 "processed_test_df.head()"
1835 ]
1836 },
1837 {
1838 "cell_type": "code",
1839 "execution_count": 53,
1840 "metadata": {},
1841 "outputs": [],
1842 "source": [
1843 "processed_test_df = processed_test_df.drop(['Mapped_Channels'], axis = 1)\n",
1844 "# Converting timedelta typed feature to int\n",
1845 "processed_test_df['checkin_fly_difference'] = processed_test_df['checkin_fly_difference'].dt.days"
1846 ]
1847 },
1848 {
1849 "cell_type": "code",
1850 "execution_count": 57,
1851 "metadata": {},
1852 "outputs": [
1853 {
1854 "data": {
1855 "text/plain": [
1856 "SWC_FLY int64\n",
1857 "Cabin_Class int64\n",
1858 "SWC_FQTV_Member int64\n",
1859 "Passenger_Baggage_Count int64\n",
1860 "Passenger_Baggage_Weight int64\n",
1861 "SWC_Staff int64\n",
1862 "SWC_CIP_Passenger int64\n",
1863 "SWC_VIP_Passenger int64\n",
1864 "SWC_Has_Infant int64\n",
1865 "Operation_Count object\n",
1866 "Inbound_Exist int64\n",
1867 "Outbound_Exist int64\n",
1868 "Is_Inbound_KDT int64\n",
1869 "Is_Outbound_KDT int64\n",
1870 "Different_Operation_Airport int64\n",
1871 "? uint8\n",
1872 "C uint8\n",
1873 "F uint8\n",
1874 "M uint8\n",
1875 "kiosk uint8\n",
1876 "kontuar uint8\n",
1877 "mobile uint8\n",
1878 "online uint8\n",
1879 "other uint8\n",
1880 "checkin_fly_difference int64\n",
1881 "dtype: object"
1882 ]
1883 },
1884 "execution_count": 57,
1885 "metadata": {},
1886 "output_type": "execute_result"
1887 }
1888 ],
1889 "source": [
1890 "processed_test_df.dtypes"
1891 ]
1892 },
1893 {
1894 "cell_type": "markdown",
1895 "metadata": {},
1896 "source": [
1897 "It seems some of the one-hot encoded features are missing from processed_test_df. Adding those in the cell below."
1898 ]
1899 },
1900 {
1901 "cell_type": "code",
1902 "execution_count": 59,
1903 "metadata": {},
1904 "outputs": [],
1905 "source": [
1906 "processed_test_df['C/INF'] = 0\n",
1907 "processed_test_df['M/INF'] = 0\n",
1908 "processed_test_df['F/INF'] = 0"
1909 ]
1910 },
1911 {
1912 "cell_type": "code",
1913 "execution_count": 60,
1914 "metadata": {},
1915 "outputs": [
1916 {
1917 "data": {
1918 "text/html": [
1919 "<div>\n",
1920 "<style scoped>\n",
1921 " .dataframe tbody tr th:only-of-type {\n",
1922 " vertical-align: middle;\n",
1923 " }\n",
1924 "\n",
1925 " .dataframe tbody tr th {\n",
1926 " vertical-align: top;\n",
1927 " }\n",
1928 "\n",
1929 " .dataframe thead th {\n",
1930 " text-align: right;\n",
1931 " }\n",
1932 "</style>\n",
1933 "<table border=\"1\" class=\"dataframe\">\n",
1934 " <thead>\n",
1935 " <tr style=\"text-align: right;\">\n",
1936 " <th></th>\n",
1937 " <th>SWC_FLY</th>\n",
1938 " <th>Cabin_Class</th>\n",
1939 " <th>SWC_FQTV_Member</th>\n",
1940 " <th>Passenger_Baggage_Count</th>\n",
1941 " <th>Passenger_Baggage_Weight</th>\n",
1942 " <th>SWC_Staff</th>\n",
1943 " <th>SWC_CIP_Passenger</th>\n",
1944 " <th>SWC_VIP_Passenger</th>\n",
1945 " <th>SWC_Has_Infant</th>\n",
1946 " <th>Operation_Count</th>\n",
1947 " <th>...</th>\n",
1948 " <th>M</th>\n",
1949 " <th>kiosk</th>\n",
1950 " <th>kontuar</th>\n",
1951 " <th>mobile</th>\n",
1952 " <th>online</th>\n",
1953 " <th>other</th>\n",
1954 " <th>checkin_fly_difference</th>\n",
1955 " <th>C/INF</th>\n",
1956 " <th>M/INF</th>\n",
1957 " <th>F/INF</th>\n",
1958 " </tr>\n",
1959 " </thead>\n",
1960 " <tbody>\n",
1961 " <tr>\n",
1962 " <th>0</th>\n",
1963 " <td>1</td>\n",
1964 " <td>0</td>\n",
1965 " <td>1</td>\n",
1966 " <td>1</td>\n",
1967 " <td>17</td>\n",
1968 " <td>0</td>\n",
1969 " <td>0</td>\n",
1970 " <td>0</td>\n",
1971 " <td>0</td>\n",
1972 " <td></td>\n",
1973 " <td>...</td>\n",
1974 " <td>0</td>\n",
1975 " <td>0</td>\n",
1976 " <td>0</td>\n",
1977 " <td>1</td>\n",
1978 " <td>0</td>\n",
1979 " <td>0</td>\n",
1980 " <td>1</td>\n",
1981 " <td>0</td>\n",
1982 " <td>0</td>\n",
1983 " <td>0</td>\n",
1984 " </tr>\n",
1985 " <tr>\n",
1986 " <th>1</th>\n",
1987 " <td>1</td>\n",
1988 " <td>0</td>\n",
1989 " <td>0</td>\n",
1990 " <td>1</td>\n",
1991 " <td>7</td>\n",
1992 " <td>0</td>\n",
1993 " <td>0</td>\n",
1994 " <td>0</td>\n",
1995 " <td>0</td>\n",
1996 " <td></td>\n",
1997 " <td>...</td>\n",
1998 " <td>1</td>\n",
1999 " <td>0</td>\n",
2000 " <td>1</td>\n",
2001 " <td>0</td>\n",
2002 " <td>0</td>\n",
2003 " <td>0</td>\n",
2004 " <td>0</td>\n",
2005 " <td>0</td>\n",
2006 " <td>0</td>\n",
2007 " <td>0</td>\n",
2008 " </tr>\n",
2009 " <tr>\n",
2010 " <th>2</th>\n",
2011 " <td>1</td>\n",
2012 " <td>0</td>\n",
2013 " <td>1</td>\n",
2014 " <td>0</td>\n",
2015 " <td>0</td>\n",
2016 " <td>0</td>\n",
2017 " <td>0</td>\n",
2018 " <td>0</td>\n",
2019 " <td>0</td>\n",
2020 " <td></td>\n",
2021 " <td>...</td>\n",
2022 " <td>1</td>\n",
2023 " <td>0</td>\n",
2024 " <td>1</td>\n",
2025 " <td>0</td>\n",
2026 " <td>0</td>\n",
2027 " <td>0</td>\n",
2028 " <td>0</td>\n",
2029 " <td>0</td>\n",
2030 " <td>0</td>\n",
2031 " <td>0</td>\n",
2032 " </tr>\n",
2033 " <tr>\n",
2034 " <th>3</th>\n",
2035 " <td>1</td>\n",
2036 " <td>0</td>\n",
2037 " <td>1</td>\n",
2038 " <td>0</td>\n",
2039 " <td>0</td>\n",
2040 " <td>0</td>\n",
2041 " <td>1</td>\n",
2042 " <td>0</td>\n",
2043 " <td>0</td>\n",
2044 " <td></td>\n",
2045 " <td>...</td>\n",
2046 " <td>1</td>\n",
2047 " <td>0</td>\n",
2048 " <td>0</td>\n",
2049 " <td>0</td>\n",
2050 " <td>1</td>\n",
2051 " <td>0</td>\n",
2052 " <td>1</td>\n",
2053 " <td>0</td>\n",
2054 " <td>0</td>\n",
2055 " <td>0</td>\n",
2056 " </tr>\n",
2057 " <tr>\n",
2058 " <th>4</th>\n",
2059 " <td>1</td>\n",
2060 " <td>0</td>\n",
2061 " <td>0</td>\n",
2062 " <td>1</td>\n",
2063 " <td>18</td>\n",
2064 " <td>0</td>\n",
2065 " <td>0</td>\n",
2066 " <td>0</td>\n",
2067 " <td>0</td>\n",
2068 " <td></td>\n",
2069 " <td>...</td>\n",
2070 " <td>0</td>\n",
2071 " <td>0</td>\n",
2072 " <td>0</td>\n",
2073 " <td>0</td>\n",
2074 " <td>1</td>\n",
2075 " <td>0</td>\n",
2076 " <td>1</td>\n",
2077 " <td>0</td>\n",
2078 " <td>0</td>\n",
2079 " <td>0</td>\n",
2080 " </tr>\n",
2081 " <tr>\n",
2082 " <th>5</th>\n",
2083 " <td>1</td>\n",
2084 " <td>0</td>\n",
2085 " <td>0</td>\n",
2086 " <td>2</td>\n",
2087 " <td>27</td>\n",
2088 " <td>0</td>\n",
2089 " <td>0</td>\n",
2090 " <td>0</td>\n",
2091 " <td>0</td>\n",
2092 " <td></td>\n",
2093 " <td>...</td>\n",
2094 " <td>1</td>\n",
2095 " <td>0</td>\n",
2096 " <td>1</td>\n",
2097 " <td>0</td>\n",
2098 " <td>0</td>\n",
2099 " <td>0</td>\n",
2100 " <td>0</td>\n",
2101 " <td>0</td>\n",
2102 " <td>0</td>\n",
2103 " <td>0</td>\n",
2104 " </tr>\n",
2105 " <tr>\n",
2106 " <th>6</th>\n",
2107 " <td>0</td>\n",
2108 " <td>1</td>\n",
2109 " <td>1</td>\n",
2110 " <td>0</td>\n",
2111 " <td>0</td>\n",
2112 " <td>0</td>\n",
2113 " <td>0</td>\n",
2114 " <td>0</td>\n",
2115 " <td>0</td>\n",
2116 " <td></td>\n",
2117 " <td>...</td>\n",
2118 " <td>1</td>\n",
2119 " <td>0</td>\n",
2120 " <td>0</td>\n",
2121 " <td>1</td>\n",
2122 " <td>0</td>\n",
2123 " <td>0</td>\n",
2124 " <td>1</td>\n",
2125 " <td>0</td>\n",
2126 " <td>0</td>\n",
2127 " <td>0</td>\n",
2128 " </tr>\n",
2129 " <tr>\n",
2130 " <th>7</th>\n",
2131 " <td>0</td>\n",
2132 " <td>0</td>\n",
2133 " <td>1</td>\n",
2134 " <td>0</td>\n",
2135 " <td>0</td>\n",
2136 " <td>0</td>\n",
2137 " <td>0</td>\n",
2138 " <td>0</td>\n",
2139 " <td>0</td>\n",
2140 " <td></td>\n",
2141 " <td>...</td>\n",
2142 " <td>0</td>\n",
2143 " <td>1</td>\n",
2144 " <td>0</td>\n",
2145 " <td>0</td>\n",
2146 " <td>0</td>\n",
2147 " <td>0</td>\n",
2148 " <td>0</td>\n",
2149 " <td>0</td>\n",
2150 " <td>0</td>\n",
2151 " <td>0</td>\n",
2152 " </tr>\n",
2153 " <tr>\n",
2154 " <th>8</th>\n",
2155 " <td>1</td>\n",
2156 " <td>0</td>\n",
2157 " <td>0</td>\n",
2158 " <td>0</td>\n",
2159 " <td>0</td>\n",
2160 " <td>0</td>\n",
2161 " <td>0</td>\n",
2162 " <td>0</td>\n",
2163 " <td>0</td>\n",
2164 " <td></td>\n",
2165 " <td>...</td>\n",
2166 " <td>0</td>\n",
2167 " <td>0</td>\n",
2168 " <td>1</td>\n",
2169 " <td>0</td>\n",
2170 " <td>0</td>\n",
2171 " <td>0</td>\n",
2172 " <td>0</td>\n",
2173 " <td>0</td>\n",
2174 " <td>0</td>\n",
2175 " <td>0</td>\n",
2176 " </tr>\n",
2177 " <tr>\n",
2178 " <th>9</th>\n",
2179 " <td>1</td>\n",
2180 " <td>1</td>\n",
2181 " <td>1</td>\n",
2182 " <td>1</td>\n",
2183 " <td>10</td>\n",
2184 " <td>0</td>\n",
2185 " <td>1</td>\n",
2186 " <td>0</td>\n",
2187 " <td>0</td>\n",
2188 " <td></td>\n",
2189 " <td>...</td>\n",
2190 " <td>0</td>\n",
2191 " <td>0</td>\n",
2192 " <td>1</td>\n",
2193 " <td>0</td>\n",
2194 " <td>0</td>\n",
2195 " <td>0</td>\n",
2196 " <td>0</td>\n",
2197 " <td>0</td>\n",
2198 " <td>0</td>\n",
2199 " <td>0</td>\n",
2200 " </tr>\n",
2201 " </tbody>\n",
2202 "</table>\n",
2203 "<p>10 rows × 28 columns</p>\n",
2204 "</div>"
2205 ],
2206 "text/plain": [
2207 " SWC_FLY Cabin_Class SWC_FQTV_Member Passenger_Baggage_Count \\\n",
2208 "0 1 0 1 1 \n",
2209 "1 1 0 0 1 \n",
2210 "2 1 0 1 0 \n",
2211 "3 1 0 1 0 \n",
2212 "4 1 0 0 1 \n",
2213 "5 1 0 0 2 \n",
2214 "6 0 1 1 0 \n",
2215 "7 0 0 1 0 \n",
2216 "8 1 0 0 0 \n",
2217 "9 1 1 1 1 \n",
2218 "\n",
2219 " Passenger_Baggage_Weight SWC_Staff SWC_CIP_Passenger SWC_VIP_Passenger \\\n",
2220 "0 17 0 0 0 \n",
2221 "1 7 0 0 0 \n",
2222 "2 0 0 0 0 \n",
2223 "3 0 0 1 0 \n",
2224 "4 18 0 0 0 \n",
2225 "5 27 0 0 0 \n",
2226 "6 0 0 0 0 \n",
2227 "7 0 0 0 0 \n",
2228 "8 0 0 0 0 \n",
2229 "9 10 0 1 0 \n",
2230 "\n",
2231 " SWC_Has_Infant Operation_Count ... M kiosk kontuar mobile online \\\n",
2232 "0 0 ... 0 0 0 1 0 \n",
2233 "1 0 ... 1 0 1 0 0 \n",
2234 "2 0 ... 1 0 1 0 0 \n",
2235 "3 0 ... 1 0 0 0 1 \n",
2236 "4 0 ... 0 0 0 0 1 \n",
2237 "5 0 ... 1 0 1 0 0 \n",
2238 "6 0 ... 1 0 0 1 0 \n",
2239 "7 0 ... 0 1 0 0 0 \n",
2240 "8 0 ... 0 0 1 0 0 \n",
2241 "9 0 ... 0 0 1 0 0 \n",
2242 "\n",
2243 " other checkin_fly_difference C/INF M/INF F/INF \n",
2244 "0 0 1 0 0 0 \n",
2245 "1 0 0 0 0 0 \n",
2246 "2 0 0 0 0 0 \n",
2247 "3 0 1 0 0 0 \n",
2248 "4 0 1 0 0 0 \n",
2249 "5 0 0 0 0 0 \n",
2250 "6 0 1 0 0 0 \n",
2251 "7 0 0 0 0 0 \n",
2252 "8 0 0 0 0 0 \n",
2253 "9 0 0 0 0 0 \n",
2254 "\n",
2255 "[10 rows x 28 columns]"
2256 ]
2257 },
2258 "execution_count": 60,
2259 "metadata": {},
2260 "output_type": "execute_result"
2261 }
2262 ],
2263 "source": [
2264 "processed_test_df.head(10)"
2265 ]
2266 },
2267 {
2268 "cell_type": "markdown",
2269 "metadata": {},
2270 "source": [
2271 "Using Random Forest Model, which was better performing compared to XGBoost, on test data"
2272 ]
2273 },
2274 {
2275 "cell_type": "code",
2276 "execution_count": 61,
2277 "metadata": {},
2278 "outputs": [],
2279 "source": [
2280 "X_test_data = processed_test_df.drop(['Operation_Count'], axis = 1)"
2281 ]
2282 },
2283 {
2284 "cell_type": "code",
2285 "execution_count": 62,
2286 "metadata": {},
2287 "outputs": [],
2288 "source": [
2289 "final_predictions = rfmodel.predict(X_test_data)"
2290 ]
2291 },
2292 {
2293 "cell_type": "markdown",
2294 "metadata": {},
2295 "source": [
2296 "Since final predictions should be integers, we rounded float type prediction values coming from Random Forest Regressor to nearest integer value."
2297 ]
2298 },
2299 {
2300 "cell_type": "code",
2301 "execution_count": 67,
2302 "metadata": {},
2303 "outputs": [],
2304 "source": [
2305 "rounded_list = []\n",
2306 "for pred in final_predictions:\n",
2307 " pred = int(round(pred))\n",
2308 " rounded_list.append(pred)"
2309 ]
2310 },
2311 {
2312 "cell_type": "code",
2313 "execution_count": 68,
2314 "metadata": {},
2315 "outputs": [],
2316 "source": [
2317 "processed_test_df['Operation_Count'] = rounded_list"
2318 ]
2319 },
2320 {
2321 "cell_type": "code",
2322 "execution_count": 69,
2323 "metadata": {},
2324 "outputs": [
2325 {
2326 "data": {
2327 "text/html": [
2328 "<div>\n",
2329 "<style scoped>\n",
2330 " .dataframe tbody tr th:only-of-type {\n",
2331 " vertical-align: middle;\n",
2332 " }\n",
2333 "\n",
2334 " .dataframe tbody tr th {\n",
2335 " vertical-align: top;\n",
2336 " }\n",
2337 "\n",
2338 " .dataframe thead th {\n",
2339 " text-align: right;\n",
2340 " }\n",
2341 "</style>\n",
2342 "<table border=\"1\" class=\"dataframe\">\n",
2343 " <thead>\n",
2344 " <tr style=\"text-align: right;\">\n",
2345 " <th></th>\n",
2346 " <th>SWC_FLY</th>\n",
2347 " <th>Cabin_Class</th>\n",
2348 " <th>SWC_FQTV_Member</th>\n",
2349 " <th>Passenger_Baggage_Count</th>\n",
2350 " <th>Passenger_Baggage_Weight</th>\n",
2351 " <th>SWC_Staff</th>\n",
2352 " <th>SWC_CIP_Passenger</th>\n",
2353 " <th>SWC_VIP_Passenger</th>\n",
2354 " <th>SWC_Has_Infant</th>\n",
2355 " <th>Operation_Count</th>\n",
2356 " <th>...</th>\n",
2357 " <th>M</th>\n",
2358 " <th>kiosk</th>\n",
2359 " <th>kontuar</th>\n",
2360 " <th>mobile</th>\n",
2361 " <th>online</th>\n",
2362 " <th>other</th>\n",
2363 " <th>checkin_fly_difference</th>\n",
2364 " <th>C/INF</th>\n",
2365 " <th>M/INF</th>\n",
2366 " <th>F/INF</th>\n",
2367 " </tr>\n",
2368 " </thead>\n",
2369 " <tbody>\n",
2370 " <tr>\n",
2371 " <th>0</th>\n",
2372 " <td>1</td>\n",
2373 " <td>0</td>\n",
2374 " <td>1</td>\n",
2375 " <td>1</td>\n",
2376 " <td>17</td>\n",
2377 " <td>0</td>\n",
2378 " <td>0</td>\n",
2379 " <td>0</td>\n",
2380 " <td>0</td>\n",
2381 " <td>1</td>\n",
2382 " <td>...</td>\n",
2383 " <td>0</td>\n",
2384 " <td>0</td>\n",
2385 " <td>0</td>\n",
2386 " <td>1</td>\n",
2387 " <td>0</td>\n",
2388 " <td>0</td>\n",
2389 " <td>1</td>\n",
2390 " <td>0</td>\n",
2391 " <td>0</td>\n",
2392 " <td>0</td>\n",
2393 " </tr>\n",
2394 " <tr>\n",
2395 " <th>1</th>\n",
2396 " <td>1</td>\n",
2397 " <td>0</td>\n",
2398 " <td>0</td>\n",
2399 " <td>1</td>\n",
2400 " <td>7</td>\n",
2401 " <td>0</td>\n",
2402 " <td>0</td>\n",
2403 " <td>0</td>\n",
2404 " <td>0</td>\n",
2405 " <td>1</td>\n",
2406 " <td>...</td>\n",
2407 " <td>1</td>\n",
2408 " <td>0</td>\n",
2409 " <td>1</td>\n",
2410 " <td>0</td>\n",
2411 " <td>0</td>\n",
2412 " <td>0</td>\n",
2413 " <td>0</td>\n",
2414 " <td>0</td>\n",
2415 " <td>0</td>\n",
2416 " <td>0</td>\n",
2417 " </tr>\n",
2418 " <tr>\n",
2419 " <th>2</th>\n",
2420 " <td>1</td>\n",
2421 " <td>0</td>\n",
2422 " <td>1</td>\n",
2423 " <td>0</td>\n",
2424 " <td>0</td>\n",
2425 " <td>0</td>\n",
2426 " <td>0</td>\n",
2427 " <td>0</td>\n",
2428 " <td>0</td>\n",
2429 " <td>1</td>\n",
2430 " <td>...</td>\n",
2431 " <td>1</td>\n",
2432 " <td>0</td>\n",
2433 " <td>1</td>\n",
2434 " <td>0</td>\n",
2435 " <td>0</td>\n",
2436 " <td>0</td>\n",
2437 " <td>0</td>\n",
2438 " <td>0</td>\n",
2439 " <td>0</td>\n",
2440 " <td>0</td>\n",
2441 " </tr>\n",
2442 " <tr>\n",
2443 " <th>3</th>\n",
2444 " <td>1</td>\n",
2445 " <td>0</td>\n",
2446 " <td>1</td>\n",
2447 " <td>0</td>\n",
2448 " <td>0</td>\n",
2449 " <td>0</td>\n",
2450 " <td>1</td>\n",
2451 " <td>0</td>\n",
2452 " <td>0</td>\n",
2453 " <td>1</td>\n",
2454 " <td>...</td>\n",
2455 " <td>1</td>\n",
2456 " <td>0</td>\n",
2457 " <td>0</td>\n",
2458 " <td>0</td>\n",
2459 " <td>1</td>\n",
2460 " <td>0</td>\n",
2461 " <td>1</td>\n",
2462 " <td>0</td>\n",
2463 " <td>0</td>\n",
2464 " <td>0</td>\n",
2465 " </tr>\n",
2466 " <tr>\n",
2467 " <th>4</th>\n",
2468 " <td>1</td>\n",
2469 " <td>0</td>\n",
2470 " <td>0</td>\n",
2471 " <td>1</td>\n",
2472 " <td>18</td>\n",
2473 " <td>0</td>\n",
2474 " <td>0</td>\n",
2475 " <td>0</td>\n",
2476 " <td>0</td>\n",
2477 " <td>1</td>\n",
2478 " <td>...</td>\n",
2479 " <td>0</td>\n",
2480 " <td>0</td>\n",
2481 " <td>0</td>\n",
2482 " <td>0</td>\n",
2483 " <td>1</td>\n",
2484 " <td>0</td>\n",
2485 " <td>1</td>\n",
2486 " <td>0</td>\n",
2487 " <td>0</td>\n",
2488 " <td>0</td>\n",
2489 " </tr>\n",
2490 " <tr>\n",
2491 " <th>5</th>\n",
2492 " <td>1</td>\n",
2493 " <td>0</td>\n",
2494 " <td>0</td>\n",
2495 " <td>2</td>\n",
2496 " <td>27</td>\n",
2497 " <td>0</td>\n",
2498 " <td>0</td>\n",
2499 " <td>0</td>\n",
2500 " <td>0</td>\n",
2501 " <td>1</td>\n",
2502 " <td>...</td>\n",
2503 " <td>1</td>\n",
2504 " <td>0</td>\n",
2505 " <td>1</td>\n",
2506 " <td>0</td>\n",
2507 " <td>0</td>\n",
2508 " <td>0</td>\n",
2509 " <td>0</td>\n",
2510 " <td>0</td>\n",
2511 " <td>0</td>\n",
2512 " <td>0</td>\n",
2513 " </tr>\n",
2514 " <tr>\n",
2515 " <th>6</th>\n",
2516 " <td>0</td>\n",
2517 " <td>1</td>\n",
2518 " <td>1</td>\n",
2519 " <td>0</td>\n",
2520 " <td>0</td>\n",
2521 " <td>0</td>\n",
2522 " <td>0</td>\n",
2523 " <td>0</td>\n",
2524 " <td>0</td>\n",
2525 " <td>1</td>\n",
2526 " <td>...</td>\n",
2527 " <td>1</td>\n",
2528 " <td>0</td>\n",
2529 " <td>0</td>\n",
2530 " <td>1</td>\n",
2531 " <td>0</td>\n",
2532 " <td>0</td>\n",
2533 " <td>1</td>\n",
2534 " <td>0</td>\n",
2535 " <td>0</td>\n",
2536 " <td>0</td>\n",
2537 " </tr>\n",
2538 " <tr>\n",
2539 " <th>7</th>\n",
2540 " <td>0</td>\n",
2541 " <td>0</td>\n",
2542 " <td>1</td>\n",
2543 " <td>0</td>\n",
2544 " <td>0</td>\n",
2545 " <td>0</td>\n",
2546 " <td>0</td>\n",
2547 " <td>0</td>\n",
2548 " <td>0</td>\n",
2549 " <td>1</td>\n",
2550 " <td>...</td>\n",
2551 " <td>0</td>\n",
2552 " <td>1</td>\n",
2553 " <td>0</td>\n",
2554 " <td>0</td>\n",
2555 " <td>0</td>\n",
2556 " <td>0</td>\n",
2557 " <td>0</td>\n",
2558 " <td>0</td>\n",
2559 " <td>0</td>\n",
2560 " <td>0</td>\n",
2561 " </tr>\n",
2562 " <tr>\n",
2563 " <th>8</th>\n",
2564 " <td>1</td>\n",
2565 " <td>0</td>\n",
2566 " <td>0</td>\n",
2567 " <td>0</td>\n",
2568 " <td>0</td>\n",
2569 " <td>0</td>\n",
2570 " <td>0</td>\n",
2571 " <td>0</td>\n",
2572 " <td>0</td>\n",
2573 " <td>6</td>\n",
2574 " <td>...</td>\n",
2575 " <td>0</td>\n",
2576 " <td>0</td>\n",
2577 " <td>1</td>\n",
2578 " <td>0</td>\n",
2579 " <td>0</td>\n",
2580 " <td>0</td>\n",
2581 " <td>0</td>\n",
2582 " <td>0</td>\n",
2583 " <td>0</td>\n",
2584 " <td>0</td>\n",
2585 " </tr>\n",
2586 " <tr>\n",
2587 " <th>9</th>\n",
2588 " <td>1</td>\n",
2589 " <td>1</td>\n",
2590 " <td>1</td>\n",
2591 " <td>1</td>\n",
2592 " <td>10</td>\n",
2593 " <td>0</td>\n",
2594 " <td>1</td>\n",
2595 " <td>0</td>\n",
2596 " <td>0</td>\n",
2597 " <td>1</td>\n",
2598 " <td>...</td>\n",
2599 " <td>0</td>\n",
2600 " <td>0</td>\n",
2601 " <td>1</td>\n",
2602 " <td>0</td>\n",
2603 " <td>0</td>\n",
2604 " <td>0</td>\n",
2605 " <td>0</td>\n",
2606 " <td>0</td>\n",
2607 " <td>0</td>\n",
2608 " <td>0</td>\n",
2609 " </tr>\n",
2610 " <tr>\n",
2611 " <th>10</th>\n",
2612 " <td>1</td>\n",
2613 " <td>0</td>\n",
2614 " <td>0</td>\n",
2615 " <td>3</td>\n",
2616 " <td>34</td>\n",
2617 " <td>0</td>\n",
2618 " <td>0</td>\n",
2619 " <td>0</td>\n",
2620 " <td>0</td>\n",
2621 " <td>1</td>\n",
2622 " <td>...</td>\n",
2623 " <td>0</td>\n",
2624 " <td>0</td>\n",
2625 " <td>1</td>\n",
2626 " <td>0</td>\n",
2627 " <td>0</td>\n",
2628 " <td>0</td>\n",
2629 " <td>0</td>\n",
2630 " <td>0</td>\n",
2631 " <td>0</td>\n",
2632 " <td>0</td>\n",
2633 " </tr>\n",
2634 " <tr>\n",
2635 " <th>11</th>\n",
2636 " <td>1</td>\n",
2637 " <td>0</td>\n",
2638 " <td>0</td>\n",
2639 " <td>2</td>\n",
2640 " <td>30</td>\n",
2641 " <td>0</td>\n",
2642 " <td>0</td>\n",
2643 " <td>0</td>\n",
2644 " <td>0</td>\n",
2645 " <td>1</td>\n",
2646 " <td>...</td>\n",
2647 " <td>0</td>\n",
2648 " <td>0</td>\n",
2649 " <td>1</td>\n",
2650 " <td>0</td>\n",
2651 " <td>0</td>\n",
2652 " <td>0</td>\n",
2653 " <td>0</td>\n",
2654 " <td>0</td>\n",
2655 " <td>0</td>\n",
2656 " <td>0</td>\n",
2657 " </tr>\n",
2658 " <tr>\n",
2659 " <th>12</th>\n",
2660 " <td>1</td>\n",
2661 " <td>0</td>\n",
2662 " <td>1</td>\n",
2663 " <td>1</td>\n",
2664 " <td>9</td>\n",
2665 " <td>0</td>\n",
2666 " <td>0</td>\n",
2667 " <td>0</td>\n",
2668 " <td>0</td>\n",
2669 " <td>1</td>\n",
2670 " <td>...</td>\n",
2671 " <td>0</td>\n",
2672 " <td>0</td>\n",
2673 " <td>0</td>\n",
2674 " <td>0</td>\n",
2675 " <td>1</td>\n",
2676 " <td>0</td>\n",
2677 " <td>1</td>\n",
2678 " <td>0</td>\n",
2679 " <td>0</td>\n",
2680 " <td>0</td>\n",
2681 " </tr>\n",
2682 " <tr>\n",
2683 " <th>13</th>\n",
2684 " <td>1</td>\n",
2685 " <td>0</td>\n",
2686 " <td>1</td>\n",
2687 " <td>1</td>\n",
2688 " <td>7</td>\n",
2689 " <td>0</td>\n",
2690 " <td>1</td>\n",
2691 " <td>0</td>\n",
2692 " <td>0</td>\n",
2693 " <td>1</td>\n",
2694 " <td>...</td>\n",
2695 " <td>0</td>\n",
2696 " <td>0</td>\n",
2697 " <td>0</td>\n",
2698 " <td>0</td>\n",
2699 " <td>1</td>\n",
2700 " <td>0</td>\n",
2701 " <td>1</td>\n",
2702 " <td>0</td>\n",
2703 " <td>0</td>\n",
2704 " <td>0</td>\n",
2705 " </tr>\n",
2706 " <tr>\n",
2707 " <th>14</th>\n",
2708 " <td>1</td>\n",
2709 " <td>0</td>\n",
2710 " <td>1</td>\n",
2711 " <td>0</td>\n",
2712 " <td>0</td>\n",
2713 " <td>0</td>\n",
2714 " <td>0</td>\n",
2715 " <td>0</td>\n",
2716 " <td>0</td>\n",
2717 " <td>2</td>\n",
2718 " <td>...</td>\n",
2719 " <td>1</td>\n",
2720 " <td>0</td>\n",
2721 " <td>0</td>\n",
2722 " <td>1</td>\n",
2723 " <td>0</td>\n",
2724 " <td>0</td>\n",
2725 " <td>1</td>\n",
2726 " <td>0</td>\n",
2727 " <td>0</td>\n",
2728 " <td>0</td>\n",
2729 " </tr>\n",
2730 " <tr>\n",
2731 " <th>15</th>\n",
2732 " <td>1</td>\n",
2733 " <td>0</td>\n",
2734 " <td>0</td>\n",
2735 " <td>1</td>\n",
2736 " <td>14</td>\n",
2737 " <td>0</td>\n",
2738 " <td>0</td>\n",
2739 " <td>0</td>\n",
2740 " <td>0</td>\n",
2741 " <td>1</td>\n",
2742 " <td>...</td>\n",
2743 " <td>1</td>\n",
2744 " <td>1</td>\n",
2745 " <td>0</td>\n",
2746 " <td>0</td>\n",
2747 " <td>0</td>\n",
2748 " <td>0</td>\n",
2749 " <td>0</td>\n",
2750 " <td>0</td>\n",
2751 " <td>0</td>\n",
2752 " <td>0</td>\n",
2753 " </tr>\n",
2754 " <tr>\n",
2755 " <th>16</th>\n",
2756 " <td>1</td>\n",
2757 " <td>0</td>\n",
2758 " <td>1</td>\n",
2759 " <td>2</td>\n",
2760 " <td>26</td>\n",
2761 " <td>0</td>\n",
2762 " <td>0</td>\n",
2763 " <td>0</td>\n",
2764 " <td>0</td>\n",
2765 " <td>1</td>\n",
2766 " <td>...</td>\n",
2767 " <td>0</td>\n",
2768 " <td>0</td>\n",
2769 " <td>0</td>\n",
2770 " <td>1</td>\n",
2771 " <td>0</td>\n",
2772 " <td>0</td>\n",
2773 " <td>0</td>\n",
2774 " <td>0</td>\n",
2775 " <td>0</td>\n",
2776 " <td>0</td>\n",
2777 " </tr>\n",
2778 " <tr>\n",
2779 " <th>17</th>\n",
2780 " <td>1</td>\n",
2781 " <td>0</td>\n",
2782 " <td>0</td>\n",
2783 " <td>0</td>\n",
2784 " <td>0</td>\n",
2785 " <td>0</td>\n",
2786 " <td>0</td>\n",
2787 " <td>0</td>\n",
2788 " <td>0</td>\n",
2789 " <td>1</td>\n",
2790 " <td>...</td>\n",
2791 " <td>1</td>\n",
2792 " <td>0</td>\n",
2793 " <td>1</td>\n",
2794 " <td>0</td>\n",
2795 " <td>0</td>\n",
2796 " <td>0</td>\n",
2797 " <td>0</td>\n",
2798 " <td>0</td>\n",
2799 " <td>0</td>\n",
2800 " <td>0</td>\n",
2801 " </tr>\n",
2802 " <tr>\n",
2803 " <th>18</th>\n",
2804 " <td>1</td>\n",
2805 " <td>0</td>\n",
2806 " <td>0</td>\n",
2807 " <td>0</td>\n",
2808 " <td>0</td>\n",
2809 " <td>0</td>\n",
2810 " <td>0</td>\n",
2811 " <td>0</td>\n",
2812 " <td>0</td>\n",
2813 " <td>6</td>\n",
2814 " <td>...</td>\n",
2815 " <td>0</td>\n",
2816 " <td>0</td>\n",
2817 " <td>1</td>\n",
2818 " <td>0</td>\n",
2819 " <td>0</td>\n",
2820 " <td>0</td>\n",
2821 " <td>0</td>\n",
2822 " <td>0</td>\n",
2823 " <td>0</td>\n",
2824 " <td>0</td>\n",
2825 " </tr>\n",
2826 " <tr>\n",
2827 " <th>19</th>\n",
2828 " <td>1</td>\n",
2829 " <td>0</td>\n",
2830 " <td>1</td>\n",
2831 " <td>6</td>\n",
2832 " <td>22</td>\n",
2833 " <td>0</td>\n",
2834 " <td>0</td>\n",
2835 " <td>0</td>\n",
2836 " <td>0</td>\n",
2837 " <td>1</td>\n",
2838 " <td>...</td>\n",
2839 " <td>1</td>\n",
2840 " <td>0</td>\n",
2841 " <td>1</td>\n",
2842 " <td>0</td>\n",
2843 " <td>0</td>\n",
2844 " <td>0</td>\n",
2845 " <td>0</td>\n",
2846 " <td>0</td>\n",
2847 " <td>0</td>\n",
2848 " <td>0</td>\n",
2849 " </tr>\n",
2850 " <tr>\n",
2851 " <th>20</th>\n",
2852 " <td>1</td>\n",
2853 " <td>0</td>\n",
2854 " <td>1</td>\n",
2855 " <td>0</td>\n",
2856 " <td>0</td>\n",
2857 " <td>0</td>\n",
2858 " <td>1</td>\n",
2859 " <td>0</td>\n",
2860 " <td>0</td>\n",
2861 " <td>1</td>\n",
2862 " <td>...</td>\n",
2863 " <td>1</td>\n",
2864 " <td>0</td>\n",
2865 " <td>1</td>\n",
2866 " <td>0</td>\n",
2867 " <td>0</td>\n",
2868 " <td>0</td>\n",
2869 " <td>0</td>\n",
2870 " <td>0</td>\n",
2871 " <td>0</td>\n",
2872 " <td>0</td>\n",
2873 " </tr>\n",
2874 " <tr>\n",
2875 " <th>21</th>\n",
2876 " <td>1</td>\n",
2877 " <td>0</td>\n",
2878 " <td>0</td>\n",
2879 " <td>0</td>\n",
2880 " <td>0</td>\n",
2881 " <td>0</td>\n",
2882 " <td>0</td>\n",
2883 " <td>0</td>\n",
2884 " <td>0</td>\n",
2885 " <td>1</td>\n",
2886 " <td>...</td>\n",
2887 " <td>1</td>\n",
2888 " <td>0</td>\n",
2889 " <td>1</td>\n",
2890 " <td>0</td>\n",
2891 " <td>0</td>\n",
2892 " <td>0</td>\n",
2893 " <td>1</td>\n",
2894 " <td>0</td>\n",
2895 " <td>0</td>\n",
2896 " <td>0</td>\n",
2897 " </tr>\n",
2898 " <tr>\n",
2899 " <th>22</th>\n",
2900 " <td>1</td>\n",
2901 " <td>0</td>\n",
2902 " <td>0</td>\n",
2903 " <td>2</td>\n",
2904 " <td>14</td>\n",
2905 " <td>0</td>\n",
2906 " <td>0</td>\n",
2907 " <td>0</td>\n",
2908 " <td>0</td>\n",
2909 " <td>1</td>\n",
2910 " <td>...</td>\n",
2911 " <td>1</td>\n",
2912 " <td>1</td>\n",
2913 " <td>0</td>\n",
2914 " <td>0</td>\n",
2915 " <td>0</td>\n",
2916 " <td>0</td>\n",
2917 " <td>0</td>\n",
2918 " <td>0</td>\n",
2919 " <td>0</td>\n",
2920 " <td>0</td>\n",
2921 " </tr>\n",
2922 " <tr>\n",
2923 " <th>23</th>\n",
2924 " <td>1</td>\n",
2925 " <td>0</td>\n",
2926 " <td>0</td>\n",
2927 " <td>0</td>\n",
2928 " <td>0</td>\n",
2929 " <td>0</td>\n",
2930 " <td>0</td>\n",
2931 " <td>0</td>\n",
2932 " <td>0</td>\n",
2933 " <td>3</td>\n",
2934 " <td>...</td>\n",
2935 " <td>1</td>\n",
2936 " <td>0</td>\n",
2937 " <td>0</td>\n",
2938 " <td>1</td>\n",
2939 " <td>0</td>\n",
2940 " <td>0</td>\n",
2941 " <td>0</td>\n",
2942 " <td>0</td>\n",
2943 " <td>0</td>\n",
2944 " <td>0</td>\n",
2945 " </tr>\n",
2946 " <tr>\n",
2947 " <th>24</th>\n",
2948 " <td>1</td>\n",
2949 " <td>0</td>\n",
2950 " <td>1</td>\n",
2951 " <td>0</td>\n",
2952 " <td>0</td>\n",
2953 " <td>0</td>\n",
2954 " <td>1</td>\n",
2955 " <td>0</td>\n",
2956 " <td>0</td>\n",
2957 " <td>1</td>\n",
2958 " <td>...</td>\n",
2959 " <td>1</td>\n",
2960 " <td>0</td>\n",
2961 " <td>0</td>\n",
2962 " <td>0</td>\n",
2963 " <td>1</td>\n",
2964 " <td>0</td>\n",
2965 " <td>1</td>\n",
2966 " <td>0</td>\n",
2967 " <td>0</td>\n",
2968 " <td>0</td>\n",
2969 " </tr>\n",
2970 " </tbody>\n",
2971 "</table>\n",
2972 "<p>25 rows × 28 columns</p>\n",
2973 "</div>"
2974 ],
2975 "text/plain": [
2976 " SWC_FLY Cabin_Class SWC_FQTV_Member Passenger_Baggage_Count \\\n",
2977 "0 1 0 1 1 \n",
2978 "1 1 0 0 1 \n",
2979 "2 1 0 1 0 \n",
2980 "3 1 0 1 0 \n",
2981 "4 1 0 0 1 \n",
2982 "5 1 0 0 2 \n",
2983 "6 0 1 1 0 \n",
2984 "7 0 0 1 0 \n",
2985 "8 1 0 0 0 \n",
2986 "9 1 1 1 1 \n",
2987 "10 1 0 0 3 \n",
2988 "11 1 0 0 2 \n",
2989 "12 1 0 1 1 \n",
2990 "13 1 0 1 1 \n",
2991 "14 1 0 1 0 \n",
2992 "15 1 0 0 1 \n",
2993 "16 1 0 1 2 \n",
2994 "17 1 0 0 0 \n",
2995 "18 1 0 0 0 \n",
2996 "19 1 0 1 6 \n",
2997 "20 1 0 1 0 \n",
2998 "21 1 0 0 0 \n",
2999 "22 1 0 0 2 \n",
3000 "23 1 0 0 0 \n",
3001 "24 1 0 1 0 \n",
3002 "\n",
3003 " Passenger_Baggage_Weight SWC_Staff SWC_CIP_Passenger SWC_VIP_Passenger \\\n",
3004 "0 17 0 0 0 \n",
3005 "1 7 0 0 0 \n",
3006 "2 0 0 0 0 \n",
3007 "3 0 0 1 0 \n",
3008 "4 18 0 0 0 \n",
3009 "5 27 0 0 0 \n",
3010 "6 0 0 0 0 \n",
3011 "7 0 0 0 0 \n",
3012 "8 0 0 0 0 \n",
3013 "9 10 0 1 0 \n",
3014 "10 34 0 0 0 \n",
3015 "11 30 0 0 0 \n",
3016 "12 9 0 0 0 \n",
3017 "13 7 0 1 0 \n",
3018 "14 0 0 0 0 \n",
3019 "15 14 0 0 0 \n",
3020 "16 26 0 0 0 \n",
3021 "17 0 0 0 0 \n",
3022 "18 0 0 0 0 \n",
3023 "19 22 0 0 0 \n",
3024 "20 0 0 1 0 \n",
3025 "21 0 0 0 0 \n",
3026 "22 14 0 0 0 \n",
3027 "23 0 0 0 0 \n",
3028 "24 0 0 1 0 \n",
3029 "\n",
3030 " SWC_Has_Infant Operation_Count ... M kiosk kontuar mobile online \\\n",
3031 "0 0 1 ... 0 0 0 1 0 \n",
3032 "1 0 1 ... 1 0 1 0 0 \n",
3033 "2 0 1 ... 1 0 1 0 0 \n",
3034 "3 0 1 ... 1 0 0 0 1 \n",
3035 "4 0 1 ... 0 0 0 0 1 \n",
3036 "5 0 1 ... 1 0 1 0 0 \n",
3037 "6 0 1 ... 1 0 0 1 0 \n",
3038 "7 0 1 ... 0 1 0 0 0 \n",
3039 "8 0 6 ... 0 0 1 0 0 \n",
3040 "9 0 1 ... 0 0 1 0 0 \n",
3041 "10 0 1 ... 0 0 1 0 0 \n",
3042 "11 0 1 ... 0 0 1 0 0 \n",
3043 "12 0 1 ... 0 0 0 0 1 \n",
3044 "13 0 1 ... 0 0 0 0 1 \n",
3045 "14 0 2 ... 1 0 0 1 0 \n",
3046 "15 0 1 ... 1 1 0 0 0 \n",
3047 "16 0 1 ... 0 0 0 1 0 \n",
3048 "17 0 1 ... 1 0 1 0 0 \n",
3049 "18 0 6 ... 0 0 1 0 0 \n",
3050 "19 0 1 ... 1 0 1 0 0 \n",
3051 "20 0 1 ... 1 0 1 0 0 \n",
3052 "21 0 1 ... 1 0 1 0 0 \n",
3053 "22 0 1 ... 1 1 0 0 0 \n",
3054 "23 0 3 ... 1 0 0 1 0 \n",
3055 "24 0 1 ... 1 0 0 0 1 \n",
3056 "\n",
3057 " other checkin_fly_difference C/INF M/INF F/INF \n",
3058 "0 0 1 0 0 0 \n",
3059 "1 0 0 0 0 0 \n",
3060 "2 0 0 0 0 0 \n",
3061 "3 0 1 0 0 0 \n",
3062 "4 0 1 0 0 0 \n",
3063 "5 0 0 0 0 0 \n",
3064 "6 0 1 0 0 0 \n",
3065 "7 0 0 0 0 0 \n",
3066 "8 0 0 0 0 0 \n",
3067 "9 0 0 0 0 0 \n",
3068 "10 0 0 0 0 0 \n",
3069 "11 0 0 0 0 0 \n",
3070 "12 0 1 0 0 0 \n",
3071 "13 0 1 0 0 0 \n",
3072 "14 0 1 0 0 0 \n",
3073 "15 0 0 0 0 0 \n",
3074 "16 0 0 0 0 0 \n",
3075 "17 0 0 0 0 0 \n",
3076 "18 0 0 0 0 0 \n",
3077 "19 0 0 0 0 0 \n",
3078 "20 0 0 0 0 0 \n",
3079 "21 0 1 0 0 0 \n",
3080 "22 0 0 0 0 0 \n",
3081 "23 0 0 0 0 0 \n",
3082 "24 0 1 0 0 0 \n",
3083 "\n",
3084 "[25 rows x 28 columns]"
3085 ]
3086 },
3087 "execution_count": 69,
3088 "metadata": {},
3089 "output_type": "execute_result"
3090 }
3091 ],
3092 "source": [
3093 "processed_test_df.head(25)"
3094 ]
3095 },
3096 {
3097 "cell_type": "code",
3098 "execution_count": 71,
3099 "metadata": {},
3100 "outputs": [],
3101 "source": [
3102 "### Printing to Excel ###\n",
3103 "# If you wish you can print results to excel by uncommenting and executing this cell.\n",
3104 "# processed_test_df.to_excel(\"Enter filename here\")"
3105 ]
3106 },
3107 {
3108 "cell_type": "markdown",
3109 "metadata": {},
3110 "source": [
3111 "<b> Thanks for checking out our work :) </b>"
3112 ]
3113 }
3114 ],
3115 "metadata": {
3116 "kernelspec": {
3117 "display_name": "Python 3",
3118 "language": "python",
3119 "name": "python3"
3120 },
3121 "language_info": {
3122 "codemirror_mode": {
3123 "name": "ipython",
3124 "version": 3
3125 },
3126 "file_extension": ".py",
3127 "mimetype": "text/x-python",
3128 "name": "python",
3129 "nbconvert_exporter": "python",
3130 "pygments_lexer": "ipython3",
3131 "version": "3.7.1"
3132 }
3133 },
3134 "nbformat": 4,
3135 "nbformat_minor": 2
3136}