SELECT DISTINCT(interaction) FROM zbozi16; SELECT DISTINCT(sourcecategory) FROM zbozi16; SELECT DISTINCT(service) FROM sluzby16; SELECT * from sluzby16 WHERE cookieid in (SELECT cookieid from (SELECT distinct(cookieid) from sluzby16) AS cookieid WHERE 0.01 >= random()); SELECT DISTINCT(sourcecategory) FROM zbozi17 UNION SELECT DISTINCT(sourcecategory) FROM zbozi16 ORDER BY (sourcecategory) DESC; SELECT count(*) FROM zbozi16 WHERE sourcecategory is NULL LIMIT 100; SELECT count(*),sourcecategory FROM zbozi16 GROUP BY (sourcecategory); /*number of interactions by traffic source */ SELECT COUNT(DISTINCT(cookieid)) from sluzby16; /* 34104 */ SELECT COUNT(DISTINCT(cookieid)) from sluzby17; /* 30356 */ SELECT COUNT(*) FROM (SELECT DISTINCT(cookieid) from sluzby17 UNION SELECT DISTINCT(cookieid) from sluzby16) AS t; /* 59570 */ SELECT COUNT(*) FROM (SELECT DISTINCT(cookieid) from sluzby17 UNION SELECT DISTINCT(cookieid) from zbozi17) AS t; /* */ SELECT COUNT(DISTINCT(cookieid)) from zbozi16; /* 314346 */ SELECT COUNT(DISTINCT(cookieid)) from zbozi16 WHERE interaction='click-to-shop'; /* 133474 */ SELECT COUNT(DISTINCT(cookieid)) from zbozi17; /* 399206 */ SELECT COUNT(DISTINCT(cookieid)) from zbozi17 WHERE interaction='click-to-shop'; /* 166449 */ SELECT COUNT(DISTINCT(cookieid)) FROM zbozi17 WHERE interaction='click-to-shop'; /* 133474 */ SELECT COUNT(*) FROM (SELECT cookieid FROM zbozi16 GROUP BY (cookieid) HAVING SUM(CASE WHEN interaction='click-to-shop' THEN 1 END)>0) AS t; /* THE SAME AS THE PREVIOUS QUERY, BUT HERE WE CAN DEFINE HOW MANY CLICKS DO WE WANT */ SELECT COUNT(cookieid) FROM zbozi16 GROUP BY (cookieid) HAVING COUNT(interaction='click-to-shop')>=0; SELECT * FROM zbozi16 WHERE counter in(1,2,3,4,5) ORDER BY (cookieid, datetime) LIMIT 1000; SELECT * FROM zbozi16 WHERE sourcecategory='PPC/Facebook' LIMIT 1000; SELECT COUNT(*), sourcecategory FROM zbozi16 GROUP BY(sourcecategory) ORDER BY (count); SELECT COUNT(*), SUM(fee), ROUND(CAST(SUM(fee)/COUNT(*)/100 as numeric), 1) avgclickfeeczk, sourcecategory FROM zbozi16 WHERE interaction='click-to-shop' GROUP BY(sourcecategory) ORDER BY (count); SELECT SUM(fee)/SUM(CASE WHEN interaction='click-to-shop' THEN 1 END) FROM zbozi16; /* Average CPC */ SELECT AVG(max) FROM (SELECT MAX(counter) AS max FROM zbozi16 GROUP BY (cookieid)) AS t; /* Average number of visits per cookie = 1.4481378266052386 */ SELECT AVG(durtoconv) FROM ( SELECT MAX(datetime) - MIN(datetime) AS durtoconv FROM zbozi16 WHERE cookieid IN ( SELECT cookieid FROM zbozi16 WHERE interaction='click-to-shop' ) GROUP BY (cookieid, counter) ) AS t; /* THIS IS NOT CORRECT Average duration to conversion for those who converted = 9 days 15:04:51.574455 */ SELECT MIN(datetime) FROM zbozi16 WHERE cookieid IN ( SELECT cookieid FROM zbozi16 WHERE interaction='click-to-shop' ) AND interaction='impress' GROUP BY (cookieid, counter, interaction); SELECT CAST(clicks AS float)/CAST(impressions AS float)*100 FROM (SELECT SUM(CASE WHEN interaction='impress' THEN 1 END) AS impressions, SUM(CASE WHEN interaction='click-to-shop' THEN 1 END) AS clicks FROM zbozi17) AS t; /* Average click through rate = 0.7707551982090216 */ SELECT * FROM sluzby16 LIMIT 1000; /* Sample of sluzby16 table */ SELECT DISTINCT(service) FROM sluzby16; /* Services of Seznam.cz in table sluzby16 */ SELECT DISTINCT(interaction) FROM sluzby16; /* Possible interaction types in table sluzby16 */ SELECT COUNT(*) FROM sluzby16 WHERE href IS NOT NULL AND interaction='impress'; /* Is href column full just in the case of mousedown interaction = YES (0) */ SELECT query FROM sluzby16 WHERE service='fulltext' LIMIT 100; SELECT COUNT(DISTINCT(cookieid)) FROM sluzby16 WHERE interaction='mousedown'; /* Number of cookies performed at least on click on advertisement = 23471 */ SELECT SUM(CASE WHEN interaction='impress' THEN 1 END) AS impressions, SUM(CASE WHEN interaction='mousedown' THEN 1 END) AS clicks FROM sluzby17; /* Number of impressions and clicks */ SELECT CAST(clicks AS float)/CAST(impressions AS float)*100 FROM (SELECT SUM(CASE WHEN interaction='impress' THEN 1 END) AS impressions, SUM(CASE WHEN interaction='mousedown' THEN 1 END) AS clicks FROM sluzby16) AS t; /* Average click through rate = 0.7707551982090216 */ SELECT COUNT(DISTINCT(cookieid)) FROM sluzby16 WHERE cookieid IN ( SELECT DISTINCT(cookieid) FROM zbozi16 ); /* Count of cookies from zbozi16 present in sluzby16 */ SELECT * FROM sluzby16 WHERE cookieid IN ( SELECT DISTINCT(cookieid) FROM sluzby16 WHERE cookieid NOT IN ( SELECT DISTINCT(cookieid) FROM zbozi16 ) ); /* This supposed to select the rows in sluzby16, for those cookies which are in sluzby16 dataset, but not in zbozi16 dataset, even though there are some mousedown interactions (mousedown != user loaded that page) */ */ SELECT * FROM sluzby16 LEFT JOIN zbozi16 ON (sluzby16.cookieid = zbozi16.cookieid) WHERE zbozi16.url IS NULL LIMIT 10; SELECT * FROM ( SELECT cookieid, datetime, interaction, null as counter, null as sourcecategory, null as refferer, service FROM sluzby16 UNION SELECT cookieid, datetime, interaction, counter, sourcecategory, referrer, null FROM zbozi16 )AS t ORDER BY (cookieid,datetime,service) LIMIT 10000; DROP TABLE mergedinter16; CREATE TABLE mergedinter16 (id SERIAL PRIMARY KEY, cookieid bigint, datetime timestamp, interaction char(15), sourcecategory varchar(46), fee int, service char(30), advertisement BOOLEAN); INSERT INTO mergedinter16 (cookieid, datetime, interaction, sourcecategory, fee, advertisement) SELECT cookieid, datetime, interaction, sourcecategory, fee, FALSE from zbozi16; INSERT INTO mergedinter16 (cookieid, datetime, interaction, sourcecategory, advertisement) SELECT cookieid, datetime, interaction, service, TRUE from sluzby16; UPDATE mergedinter16 SET interaction='hit' WHERE interaction='impress' AND advertisement=FALSE; SELECT * FROM mergedinter16 ORDER BY (datetime) LIMIT 1000; SELECT DISTINCT(cookieid) FROM mergedinter16; SELECT * FROM mergedinter16 WHERE sourcecategory='fulltext' AND interaction='mousedown' ORDER BY (datetime); SELECT * FROM mergedinter16 WHERE cookieid=-1012735486309528457 ORDER BY (datetime); /* -1534855132894165495; -1534704307188132011; -1535237201240929623 cookieids with Hinty/PI sourcecategory */ SELECT * FROM mergedinter16 WHERE cookieid=-10116917305845930 ORDER BY (datetime); /* cookie id with impressions and hits */ WITH clickers AS (SELECT DISTINCT(cookieid) AS cookieid FROM mergedinter16 WHERE interaction='click-to-shop') SELECT f.cookieid, f.firstint, c.convertint, c.convertint-f.firstint AS diff FROM (SELECT cookieid, MIN(datetime) AS firstint FROM mergedinter16 WHERE cookieid IN (SELECT cookieid FROM clickers) GROUP BY (cookieid)) f LEFT JOIN (SELECT cookieid, MIN(datetime) AS convertint FROM mergedinter16 WHERE cookieid IN (SELECT cookieid FROM clickers) AND interaction='click-to-shop' GROUP BY (cookieid)) c ON f.cookieid=c.cookieid; /* time lag from first interaction to first conversion, 90% of conversions happen within 16,1 days; 95% within 27,3 days; 99% within 45,2 days */ UPDATE mergedinter16 SET sourcecategory = CASE WHEN sourcecategory='Seznamácké weby/Sbazar' THEN 'sbazar' WHEN sourcecategory='Seznamácké weby/Proženy' THEN 'prozeny' WHEN sourcecategory='Seznamácké weby/Obrázky' THEN 'sobrazky' WHEN sourcecategory='Seznamácké weby/Novinky' THEN 'novinky' WHEN sourcecategory LIKE '%Seznamácké weby/Homepage%' THEN 'hp' WHEN sourcecategory='Hinty/SERP' THEN 'fulltext' END WHERE sourcecategory LIKE '%Seznamácké weby/Homepage%' OR sourcecategory='Hinty/SERP'; /* prepared query to replace values in sourcecategory to match those from sluzby table */ SELECT * FROM (SELECT *, lead(interaction) OVER (PARTITION BY cookieid ORDER BY datetime) AS leadinteraction, lead(sourcecategory) OVER (PARTITION BY cookieid ORDER BY datetime) AS leadsource FROM (SELECT * FROM mergedinter16 WHERE cookieid=-10116917305845930 ORDER BY (datetime)) AS t1) AS t2 WHERE (sourcecategory<>leadsource OR interaction<>leadinteraction); /* compress multiple interactions into the last of the group - this is example for only one user */ SELECT * FROM (SELECT *, lead(interaction) OVER (PARTITION BY cookieid ORDER BY datetime) AS leadinteraction, lead(sourcecategory) OVER (PARTITION BY cookieid ORDER BY datetime) AS leadsource FROM mergedinter16) AS t2 WHERE (sourcecategory<>leadsource OR interaction<>leadinteraction) ORDER BY (cookieid, datetime); /* compress multiple interactions into the last of the group */ SELECT * INTO subselected FROM (SELECT *, lead(interaction) OVER (PARTITION BY cookieid ORDER BY datetime) AS leadinteraction, lead(sourcecategory) OVER (PARTITION BY cookieid ORDER BY datetime) AS leadsource FROM mergedinter16) AS t2 WHERE (sourcecategory<>leadsource OR interaction<>leadinteraction) ORDER BY (cookieid, datetime); SELECT * FROM subselected LIMIT 1000000; SELECT DISTINCT(cookieid) FROM subselected LIMIT 4; SELECT * FROM subselected WHERE cookieid =-147761903125006743 ORDER BY (datetime); SELECT COUNT(datetime),sourcecategory,interaction FROM mergedinter16 GROUP BY sourcecategory,interaction; SELECT * FROM mergedinter16 ORDER BY cookieid LIMIT 1000; SELECT * FROM mergedinter16 WHERE cookieid IN (-1194835621172803577, -10116917305845930, -1445291641137182521, -1400592681375725822, -1105703701199164475, -127604696934318107, -1524022788103706407, -1126222800676078435, -1177996453961496594, -1088152517013578546) ORDER BY cookieid,datetime;