w4Q6iggg

· 6 years ago · May 20, 2020, 07:08 AM
1-- -- Mock orgin table from Aster
2-- DROP TABLE IF EXISTS indra.citrix_2020_05_12_data;
3-- CREATE TABLE IF NOT EXISTS indra.citrix_2020_05_12_data (
4--     usuario_encript INT,
5--     dia_inicio_sesion VARCHAR,
6--     hora_inicio_sesion VARCHAR,
7--     dia_fin_2 VARCHAR,
8--     hora_fin_2 VARCHAR,
9--     minutos_duracion_sesion INT
10-- );
11-- COPY indra.citrix_2020_05_12_data
12-- FROM '/home/rmonge/Documents/AI/Sabadell/Informe_Citrix_TSO/2020_05_12_data.csv'
13-- DELIMITER ';' CSV HEADER;
14
15-- Load data into temporary table
16DROP TABLE IF EXISTS indra.temp;
17CREATE TABLE IF NOT EXISTS indra.temp AS
18SELECT
19    usuario_encript AS user,
20    dia_inicio_sesion AS day_start,
21    hora_inicio_sesion AS hour_start,
22    dia_fin_2 AS day_end,
23    hora_fin_2 AS hour_end
24FROM indra.citrix_2020_05_12_data;
25
26-- Filter min/max dates
27DELETE FROM indra.temp
28WHERE to_date(day_start, 'DD/MM/YYYY') < '2020/05/04'
29    AND to_date(day_start, 'DD/MM/YYYY') > '2020/05/11';
30
31-- Split start-end records keeping id to join later
32DROP TABLE IF EXISTS indra.temp1;
33CREATE TABLE IF NOT EXISTS indra.temp1 AS
34SELECT
35    temp.user,
36    TO_TIMESTAMP(day_start || ' ' || hour_start, 'DD/MM/YYYY HH24:MI:SS') AS date,
37    1 AS type
38FROM indra.temp
39UNION
40SELECT
41    temp.user,
42    TO_TIMESTAMP(day_end || ' ' || hour_end, 'DD/MM/YYYY HH24:MI:SS') AS date,
43    -1 AS type
44FROM indra.temp;
45
46-- Filter inconsistent start-end pairs (new start before old end, both removed)
47DROP TABLE IF EXISTS indra.temp2;
48CREATE TABLE IF NOT EXISTS indra.temp2 AS
49SELECT *
50FROM (
51    SELECT
52        temp1.user,
53        date,
54        type,
55        SUM(type) OVER (PARTITION BY temp1.user ORDER BY date) AS acc
56    FROM indra.temp1
57    ORDER BY temp1.user, date
58) t
59WHERE (t.type = 1 AND t.acc = 1 ) OR (t.type = -1 AND t.acc = 0);
60
61-- Join consistent start-end pairs
62-- Extract hours and start day
63DROP TABLE IF EXISTS indra.temp3;
64CREATE TABLE IF NOT EXISTS indra.temp3 AS
65SELECT
66    t.user,
67    t.start,
68    t.end,
69    EXTRACT(epoch FROM (t.end - t.start))/3600 AS hours,
70    t.start::date AS day_start
71FROM (
72    SELECT
73        temp2.user,
74        date AS start,
75        type,
76        LEAD(date, 1) OVER (PARTITION BY temp2.user ORDER BY date) AS end
77    FROM indra.temp2
78) t
79WHERE t.type = 1;
80
81-- Aggregated measures per day_start
82DROP TABLE IF EXISTS indra.temp4;
83CREATE TABLE IF NOT EXISTS indra.temp4 AS
84SELECT
85    t.user, day_start,
86    COUNT(hours) AS sessions,
87    SUM(hours) AS hours,
88    MIN(start) AS start,
89    MAX(t.end) AS end,
90    EXTRACT(epoch FROM (MAX(t.end) - MIN(start)))/3600 - SUM(hours) AS waiting,
91    SUM(hours)/COUNT(hours) AS hours_x_sess
92FROM indra.temp3 t
93GROUP BY t.user, day_start
94ORDER BY t.user, day_start;
95
96-- Build final table with aggregated measures
97-- Adding total sessions from original time records
98DROP TABLE IF EXISTS indra.final_table;
99CREATE TABLE IF NOT EXISTS indra.final_table AS
100SELECT
101    h.user AS usuario_encript,
102    h.day_start AS dia_inicio_sesion,
103    h.sessions AS sesiones,
104    h.hours AS horas,
105    h.start AS inicio,
106    h.end AS final,
107    h.waiting AS descansos,
108    h.hours_x_sess AS horas_total_jornada,
109    s.total_sessions AS total_sesiones
110FROM indra.temp4 h
111INNER JOIN (
112    SELECT
113        temp.user,
114        to_date(day_start, 'DD/MM/YYYY') AS day_start,
115        COUNT(1) AS total_sessions
116    FROM indra.temp
117    GROUP BY temp.user, day_start
118    ORDER BY temp.user, day_start
119) s ON h.user = s.user AND h.day_start = s.day_start;
120
121-- Get final table duplicates
122SELECT CASE WHEN SUM(duplicates) IS NULL THEN 0 ELSE SUM(duplicates) END as duplicates
123FROM (
124    SELECT COUNT(*)-1 AS duplicates
125    FROM indra.final_table
126    GROUP BY usuario_encript, dia_inicio_sesion
127) t
128WHERE duplicates > 0