· 6 years ago · May 24, 2019, 05:10 AM
1@weblog = EXTRACT
2
3 date string,
4
5 time string,
6
7 client_ip string,
8
9 username string,
10
11 server_ip string,
12
13 port int,
14
15 method string,
16
17 stem string,
18
19 query string,
20
21 status int,
22
23 server_bytes int,
24
25 client_bytes int,
26
27 time_taken int,
28
29 user_agent string,
30
31 referrer string
32
33 FROM "/iislogs/{*}"
34
35 USING Extractors.Text(delimiter:' ', silent:true);
36
37@weblog = LogsDB.dbo.WeblogsView();
38
39OUTPUT @weblog
40
41 TO @"/output/UnsuccessfulResponses.log"
42
43 USING Outputters.Tsv();
44
45
46// Create a database for easy reuse, so you don't need to read from a file every time.
47
48 CREATE DATABASE IF NOT EXISTS LogsDB;
49
50
51
52 // Create a Table valued function. TVF ensures that your jobs fetch data from the weblog file with the correct schema.
53
54 DROP FUNCTION IF EXISTS LogsDB.dbo.WeblogsView;
55
56 CREATE FUNCTION LogsDB.dbo.WeblogsView()
57
58 RETURNS @result TABLE
59
60 (
61
62
63
64 date string,
65
66 time string,
67
68 client_ip string,
69
70 username string,
71
72 server_ip string,
73
74 port int,
75
76 method string,
77
78 stem string,
79
80 query string,
81
82 status int,
83
84 server_bytes int,
85
86 client_bytes int,
87
88 time_taken int,
89
90 user_agent string,
91
92 referrer string
93
94
95
96 )
97
98 AS
99
100 BEGIN
101
102
103
104 @result = EXTRACT
105
106 date string,
107
108 time string,
109
110 client_ip string,
111
112 username string,
113
114 server_ip string,
115
116 port int,
117
118 method string,
119
120 stem string,
121
122 query string,
123
124 status int,
125
126 server_bytes int,
127
128 client_bytes int,
129
130 time_taken int,
131
132 user_agent string,
133
134 referrer string
135
136 FROM "/iislogs/{*}"
137
138 USING Extractors.Text(delimiter:' ', silent:true);
139
140 RETURN;
141
142 END;
143
144
145
146 // Create a table for storing failures and status
147
148 DROP TABLE IF EXISTS LogsDB.dbo.FailuresPerDay;
149
150 @weblog = LogsDB.dbo.WeblogsView();
151
152 CREATE TABLE LogsDB.dbo.FailuresPerDay
153
154 (
155
156 INDEX idx1
157
158 CLUSTERED(date ASC)
159
160 DISTRIBUTED BY HASH(date)
161
162 ) AS
163
164
165
166 SELECT date,
167
168 stem,
169
170 client_ip,
171
172 status,
173
174 COUNT(DISTINCT client_ip) AS cnt
175
176 FROM @weblog
177
178 GROUP BY date,
179
180 stem,
181
182 client_ip,
183
184 status;
185
186
187
188// Query the clients that ran into errors
189
190 @content =
191
192 SELECT *
193
194 FROM LogsDB.dbo.FailuresPerDay
195
196 WHERE status < 200 OR status >= 300;
197
198
199
200 OUTPUT @content
201
202 TO @"/output/UnsuccessfulResponses.log"
203
204 USING Outputters.Tsv();
205
206
207
208
209using Microsoft.Analytics.Interfaces;
210
211 using System.Collections.Generic;
212
213
214
215 namespace USQL_UDO
216
217 {
218
219 public class CountryName : IProcessor
220
221 {
222
223 private static IDictionary<string, string> CountryTranslation = new Dictionary<string, string>
224
225 {
226
227 {
228
229 "Deutschland", "Germany"
230
231 },
232
233 {
234
235 "Suisse", "Switzerland"
236
237 },
238
239 {
240
241 "UK", "United Kingdom"
242
243 },
244
245 {
246
247 "USA", "United States of America"
248
249 },
250
251 {
252
253 "ä¸å›½", "PR China"
254
255 }
256
257 };
258
259
260
261 public override IRow Process(IRow input, IUpdatableRow output)
262
263 {
264
265
266
267 string UserID = input.Get<string>("UserID");
268
269 string Name = input.Get<string>("Name");
270
271 string Address = input.Get<string>("Address");
272
273 string City = input.Get<string>("City");
274
275 string State = input.Get<string>("State");
276
277 string PostalCode = input.Get<string>("PostalCode");
278
279 string Country = input.Get<string>("Country");
280
281 string Phone = input.Get<string>("Phone");
282
283
284
285 if (CountryTranslation.Keys.Contains(Country))
286
287 {
288
289 Country = CountryTranslation[Country];
290
291 }
292
293 output.Set<string>(0, UserID);
294
295 output.Set<string>(1, Name);
296
297 output.Set<string>(2, Address);
298
299 output.Set<string>(3, City);
300
301 output.Set<string>(4, State);
302
303 output.Set<string>(5, PostalCode);
304
305 output.Set<string>(6, Country);
306
307 output.Set<string>(7, Phone);
308
309
310
311 return output.AsReadOnly();
312
313 }
314
315 }
316
317 }
318
319
320
321
322
323@drivers =
324
325 EXTRACT UserID string,
326
327 Name string,
328
329 Address string,
330
331 City string,
332
333 State string,
334
335 PostalCode string,
336
337 Country string,
338
339 Phone string
340
341 FROM "/Samples/Data/AmbulanceData/Drivers.txt"
342
343 USING Extractors.Tsv(Encoding.Unicode);
344
345
346
347 @drivers_CountryName =
348
349 PROCESS @drivers
350
351 PRODUCE UserID string,
352
353 Name string,
354
355 Address string,
356
357 City string,
358
359 State string,
360
361 PostalCode string,
362
363 Country string,
364
365 Phone string
366
367 USING new USQL_UDO.CountryName();
368
369
370
371 OUTPUT @drivers_CountryName
372
373 TO "/Samples/Outputs/Drivers.csv"
374
375 USING Outputters.Csv(Encoding.Unicode);