· 5 years ago · May 20, 2020, 07:08 AM
1-- -- Mock orgin table from Aster
2-- DROP TABLE IF EXISTS indra.citrix_2020_05_12_data;
3-- CREATE TABLE IF NOT EXISTS indra.citrix_2020_05_12_data (
4-- usuario_encript INT,
5-- dia_inicio_sesion VARCHAR,
6-- hora_inicio_sesion VARCHAR,
7-- dia_fin_2 VARCHAR,
8-- hora_fin_2 VARCHAR,
9-- minutos_duracion_sesion INT
10-- );
11-- COPY indra.citrix_2020_05_12_data
12-- FROM '/home/rmonge/Documents/AI/Sabadell/Informe_Citrix_TSO/2020_05_12_data.csv'
13-- DELIMITER ';' CSV HEADER;
14
15-- Load data into temporary table
16DROP TABLE IF EXISTS indra.temp;
17CREATE TABLE IF NOT EXISTS indra.temp AS
18SELECT
19 usuario_encript AS user,
20 dia_inicio_sesion AS day_start,
21 hora_inicio_sesion AS hour_start,
22 dia_fin_2 AS day_end,
23 hora_fin_2 AS hour_end
24FROM indra.citrix_2020_05_12_data;
25
26-- Filter min/max dates
27DELETE FROM indra.temp
28WHERE to_date(day_start, 'DD/MM/YYYY') < '2020/05/04'
29 AND to_date(day_start, 'DD/MM/YYYY') > '2020/05/11';
30
31-- Split start-end records keeping id to join later
32DROP TABLE IF EXISTS indra.temp1;
33CREATE TABLE IF NOT EXISTS indra.temp1 AS
34SELECT
35 temp.user,
36 TO_TIMESTAMP(day_start || ' ' || hour_start, 'DD/MM/YYYY HH24:MI:SS') AS date,
37 1 AS type
38FROM indra.temp
39UNION
40SELECT
41 temp.user,
42 TO_TIMESTAMP(day_end || ' ' || hour_end, 'DD/MM/YYYY HH24:MI:SS') AS date,
43 -1 AS type
44FROM indra.temp;
45
46-- Filter inconsistent start-end pairs (new start before old end, both removed)
47DROP TABLE IF EXISTS indra.temp2;
48CREATE TABLE IF NOT EXISTS indra.temp2 AS
49SELECT *
50FROM (
51 SELECT
52 temp1.user,
53 date,
54 type,
55 SUM(type) OVER (PARTITION BY temp1.user ORDER BY date) AS acc
56 FROM indra.temp1
57 ORDER BY temp1.user, date
58) t
59WHERE (t.type = 1 AND t.acc = 1 ) OR (t.type = -1 AND t.acc = 0);
60
61-- Join consistent start-end pairs
62-- Extract hours and start day
63DROP TABLE IF EXISTS indra.temp3;
64CREATE TABLE IF NOT EXISTS indra.temp3 AS
65SELECT
66 t.user,
67 t.start,
68 t.end,
69 EXTRACT(epoch FROM (t.end - t.start))/3600 AS hours,
70 t.start::date AS day_start
71FROM (
72 SELECT
73 temp2.user,
74 date AS start,
75 type,
76 LEAD(date, 1) OVER (PARTITION BY temp2.user ORDER BY date) AS end
77 FROM indra.temp2
78) t
79WHERE t.type = 1;
80
81-- Aggregated measures per day_start
82DROP TABLE IF EXISTS indra.temp4;
83CREATE TABLE IF NOT EXISTS indra.temp4 AS
84SELECT
85 t.user, day_start,
86 COUNT(hours) AS sessions,
87 SUM(hours) AS hours,
88 MIN(start) AS start,
89 MAX(t.end) AS end,
90 EXTRACT(epoch FROM (MAX(t.end) - MIN(start)))/3600 - SUM(hours) AS waiting,
91 SUM(hours)/COUNT(hours) AS hours_x_sess
92FROM indra.temp3 t
93GROUP BY t.user, day_start
94ORDER BY t.user, day_start;
95
96-- Build final table with aggregated measures
97-- Adding total sessions from original time records
98DROP TABLE IF EXISTS indra.final_table;
99CREATE TABLE IF NOT EXISTS indra.final_table AS
100SELECT
101 h.user AS usuario_encript,
102 h.day_start AS dia_inicio_sesion,
103 h.sessions AS sesiones,
104 h.hours AS horas,
105 h.start AS inicio,
106 h.end AS final,
107 h.waiting AS descansos,
108 h.hours_x_sess AS horas_total_jornada,
109 s.total_sessions AS total_sesiones
110FROM indra.temp4 h
111INNER JOIN (
112 SELECT
113 temp.user,
114 to_date(day_start, 'DD/MM/YYYY') AS day_start,
115 COUNT(1) AS total_sessions
116 FROM indra.temp
117 GROUP BY temp.user, day_start
118 ORDER BY temp.user, day_start
119) s ON h.user = s.user AND h.day_start = s.day_start;
120
121-- Get final table duplicates
122SELECT CASE WHEN SUM(duplicates) IS NULL THEN 0 ELSE SUM(duplicates) END as duplicates
123FROM (
124 SELECT COUNT(*)-1 AS duplicates
125 FROM indra.final_table
126 GROUP BY usuario_encript, dia_inicio_sesion
127) t
128WHERE duplicates > 0