· 6 years ago · May 24, 2019, 05:20 AM
1/ Create a database for easy reuse, so you don't need to read from a file every time.
2 CREATE DATABASE IF NOT EXISTS LogsDB;
3
4 // Create a Table valued function. TVF ensures that your jobs fetch data from the weblog file with the correct schema.
5 DROP FUNCTION IF EXISTS LogsDB.dbo.WeblogsView;
6 CREATE FUNCTION LogsDB.dbo.WeblogsView()
7 RETURNS @result TABLE
8 (
9
10 date string,
11 time string,
12 client_ip string,
13 username string,
14 server_ip string,
15 port int,
16 method string,
17 stem string,
18 query string,
19 status int,
20 server_bytes int,
21 client_bytes int,
22 time_taken int,
23 user_agent string,
24 referrer string
25
26 )
27 AS
28 BEGIN
29
30 @result = EXTRACT
31 date string,
32 time string,
33 client_ip string,
34 username string,
35 server_ip string,
36 port int,
37 method string,
38 stem string,
39 query string,
40 status int,
41 server_bytes int,
42 client_bytes int,
43 time_taken int,
44 user_agent string,
45 referrer string
46 FROM "/iislogs/{*}"
47 USING Extractors.Text(delimiter:' ', silent:true);
48 RETURN;
49 END;
50
51 // Create a table for storing failures and status
52 DROP TABLE IF EXISTS LogsDB.dbo.FailuresPerDay;
53 @weblog = LogsDB.dbo.WeblogsView();
54 CREATE TABLE LogsDB.dbo.FailuresPerDay
55 (
56 INDEX idx1
57 CLUSTERED(date ASC)
58 DISTRIBUTED BY HASH(date)
59 ) AS
60
61 SELECT date,
62 stem,
63 client_ip,
64 status,
65 COUNT(DISTINCT client_ip) AS cnt
66 FROM @weblog
67 GROUP BY date,
68 stem,
69 client_ip,
70 status;