本文内容是多年前观看一个英文学习视频作得记录,相关视频找不到了,发布在这里和学习者分享,也方便自已查看。
--SELECT * from payment;
--
SELECT p.*,c.*
--FROM payment p LEFT JOIN customer c ON p.customer_id = c.customer_id
FROM payment p JOIN customer c ON p.customer_id = c.customer_id;
-- 上述两种JOIN的结果,14596条记录受影响
-- Rows in payament and i did not have a customer
-- INNER JOIN only returns rows that matches in both tables;
-- *****
SELECT p.*,c.*,r.*
FROM payment p
LEFT JOIN customer c ON p.customer_id = c.customer_id
LEFT JOIN rental r ON p.rental_id = r.rental_id;
02 Basics of SQL Joins using DVD Rental Database
-- LEFT JOIN,INNER JOINS
select generate_series(1, 10);
-- column with 1 - 10
SELECT val.val, set.set FROM generate_series(1,10)val
join generate_series(5,15)set ON val.val = set.set;
SELECT val.val, set.set FROM generate_series(1,10)val
LEFT JOIN generate_series(5,15)set on val.val = set.set; --keeps the columns in the first side
SELECT val.val, set.set FROM generate_series(1,10)val
Right join generate_series(5,15)set ON val.val = set.set; -- keeps the on the right side
-- column with 5 - 15
/* LEFT JOIN returns matches from the left table and those found on the right side of the join*/
--LEFT JOIN generate_series(5,16)set on val.val = set.set --keeps the columns in the first side
/* RIGHT JOIN returns matches from the right table and those found on the left side of the join */
--Right join generate_series(5,15)set ON val.val = set.set -- keeps the on the right side
-- 以上向左结合查询和向右结合查询的结果不同
03 Intro To Date Parsing and Time Series Data in SQL
SELECT p.customer_id, c.first_name,
SUM(p.amount)::money, COUNT(*),
SUM(p.amount)/COUNT(*) as AVG_RENTAL_VALUE
FROM payment p LEFT JOIN customer c ON p.customer_id = c.customer_id
WHERE p.customer_id > 300 AND c.first_name like '%b%'
GROUP BY 1,2
HAVING COUNT(*) > 20
--having is when you filter on aggregates, is a where clause for aggregates
ORDER BY 4 DESC;
SELECT p.*, payment_date::date,
extract(year from p.payment_date),
extract(month from p.payment_date),
extract(day from p.payment_date)
FROM payment p
where extract(month from p.payment_date) in (1,2)
-- 0419
select p.payment_date, p.amount from payment p
select to_char(p.payment_date,'yyyy-mm'),SUM(p.amount),MAX(p.amount),MIN(p.amount)
from payment p group by 1
-- -- 按照月份合计,by 1 指查询结果的第一列
select SUM(p.amount),MAX(p.amount),MIN(p.amount), to_char(p.payment_date,'yyyy-mm')
from payment p
group by 4 order by 1
04 SQL – Get Day of Week With extract() + CASE statement
-- what day of the week does the most revenue? to_chat(date,'dy')
-- 一周中获利最大的是哪一天?
select extract(dow from p.payment_date) from payment p; -- 星期的第几天,sunday is zero
---
select to_char(p.payment_date,'dy') as dow,sum(p.amount)
from payment p
group by 1
order by 2 desc;
----
select * from (
select t.*,
CASE
WHEN t.dow = 0 THEN 'Sunday'
WHEN t.dow = 1 THEN 'Monday'
WHEN t.dow = 2 THEN 'Tuesday'
WHEN t.dow = 3 THEN 'Wednesday'
WHEN t.dow = 4 THEN 'Thurs'
WHEN t.dow = 5 THEN 'Fri'
WHEN t.dow = 6 THEN 'Saturday'
END as day_of_week
from(
select extract(dow from p.payment_date) as dow,sum(p.amount)::money
--::money 表示货币数据, as dow 字段标识为dow,替代payment_date
from payment p
group by 1
order by 1
)t
)t2
where t2.day_of_week = 'Fri' or t2.day_of_week = 'Thurs';
05 More Date and Time Analysis Using SQL
-- 一周内各天的合计
select p.*, to_char(p.payment_date,'dy') as dow -- dy 表示星期几的英文缩写
from payment p;
select to_char(p.payment_date,'dy') as dow,sum(p.amount)
from payment p
group by 1
order by 2 desc
limit 3;
06 SQL – Finding Top Grossing Genres Within Each Rating
-- top grossing films per genre, within each rating
-- 各种题材的影片观看数最高的,
WITH base_table AS (
select p.rental_id,p.amount,r.inventory_id,f.title,f.rating,c.name as genre
from payment p
left join rental r on p.rental_id = r.rental_id
left join inventory i on i.inventory_id = r.inventory_id
left join film f on f.film_id = i.film_id
left join film_category fc on fc.film_id = i.film_id
left join category c on c.category_id = fc.category_id
)
SELECT * from (
select t.*,
ROW_NUMBER() over (partition by t.rating order by t.sales desc)
from(
select bt.rating, bt.genre, SUM(bt.amount) as sales
from base_table bt
group by 1,2
order by 3 desc
)t
)radical
where radical.row_number = 1;
07 Handling Missing Dates in SQL
-- 处理缺失的日期
-- dealing with fragmented data (do not have data for each day in the range)
-- drilling the difference again between INNER and LEFT JOINs :)
-- using the payment table from the dvd rental database
-- 生成日期序列
select gs::date from generate_series(
'2007-01-01',
'2007-12-31',
interval '1 day'
) gs
-- 查询租赁日合计为空的记录
With base_table as (
--上句为后套入
select gs::date,SUM(p.amount) as sales from generate_series(
'2007-01-01',
'2007-12-31',
interval '1 day'
) gs LEFT JOIN payment p on gs=p.payment_date::date
group by 1
order by 1
)
--下句为后套入
select extract(week from bt.gs),SUM(bt.sales) --按照星期合计查询
from base_table bt
group by 1
order by 1
08 Caution – Generate Series Illustrating Left vs Inner Joins and Dangers of Not Understanding Them
With base_table as (
--上句为后套入
select gs::date,SUM(p.amount) as sales from generate_series(
'2007-01-01',
'2007-12-31',
interval '1 day'
) gs LEFT JOIN payment p on gs=p.payment_date::date --去掉Left,则查询sum数据不为0的记录
group by 1
order by 1
)
--下句为后套入
select extract(week from bt.gs),SUM(bt.sales) --按照星期合计查询
from base_table bt
group by 1
order by 1
09 Filtering your database using an external data source
-- 用外部数据源过滤数据库
-- get earliest order date, correlated subquery!
-- get most recent order date, correlated subquery
-- days since most recent order, use current_date a bit
select
case
when t.country = 'United States' THEN 'USA'
else 'Rest of world' END as geo_type,
count(*) from (
select c.customer_id,c.first_name,c.last_name,c.email,c.address_id,
split_part(c.email,'@',2),c.last_name ||', '|| c.first_name as last_first,ci.city,
ci.country_id, co.country,
-- outer table, c.customer_id, inner table p.customer_id
(
select min(p.payment_date)
from payment p where p.customer_id = c.customer_id
)::date as cust_min_order_date,
(
select max(p.payment_date)
from payment p where p.customer_id = c.customer_id
)::date as cust_max_order_date,
current_date - (
select max(p.payment_date)
from payment p where p.customer_id = c.customer_id
)::date as days_since_recent_order
From customer c
left join address a on a.address_id = c.address_id
left join city ci on ci.city_id = a.city_id
left join country co on ci.country_id = co.country_id
--say you have 10 email from a tradeshow and you want to check whethre or
--not they exist in your customer table?
)t
group by 1 order by 2 desc;
-------------------
select
case
when t.country = 'United States' THEN 'USA'
else 'Rest of world' END as geo_type,
count(*) from (
select c.customer_id,c.first_name,c.last_name,c.email,c.address_id,
split_part(c.email,'@',2),c.last_name ||', '|| c.first_name as last_first,ci.city,
ci.country_id, co.country,
-- outer table, c.customer_id, inner table p.customer_id
(
select min(p.payment_date)
from payment p where p.customer_id = c.customer_id
)::date as cust_min_order_date,
(
select max(p.payment_date)
from payment p where p.customer_id = c.customer_id
)::date as cust_max_order_date,
current_date - (
select max(p.payment_date)
from payment p where p.customer_id = c.customer_id
)::date as days_since_recent_order
From customer c
left join address a on a.address_id = c.address_id
left join city ci on ci.city_id = a.city_id
left join country co on ci.country_id = co.country_id
--Inner Join on Outside of the DB data
where c.email IN (
'sonia.gregory@sakilacustomer.org',
'miriam.mckinney@sakilacustomer.org',
'velma.lucas@sakilacustomer.org',
'becky.miles@sakilacustomer.org',
'bobbie.craig@sakilacustomer.org',
'violet.rodriquez@sakilacustomer.org',
'kristina.chambers@sakilacustomer.org',
'toni.holt@sakilacustomer.org',
'misty.lambert@sakilacustomer.org',
'mae.fletcher@sakilacustomer.org',
'shelly.watts@sakilacustomer.org',
'daisy.bates@sakilacustomer.org',
'ramona.hale@sakilacustomer.org'
)
)t
group by 1 order by 2 desc
10 Using postgres regexp_replace to clean text data
-- 使用 regexp_replace 清洗文本数据
-- Regular Expressions (clearning data)
-- Remove stop words
-- Lowercase all text
-- Trim text
-- then count by first letter of first name in title
-- remove of, a, in
-- word boundary
Select f.description,
--regexp_replace(f.description,'of|a|in','xxx','g')
--regexp_replace(f.description,'of|\ma\M|in','xxx','g')
--regexp_replace(f.description,'\mof\M|\ma\M|\min\M','xxx','g')
--regexp_replace(f.description,'\mof\M|\ma\M|\min\M','','g')
--regexp_replace(f.description,'\mof\M|\ma\M|\min\M|\s\s+','','g')
lower(
regexp_replace(
regexp_replace(f.description,'\mof\M|\ma\M|\min\M','','g'),
'\s+',' ','g')),
--left(lower(
-- regexp_replace(
-- regexp_replace(f.description,'\mof\M|\ma\M|\min\M','','g'),
-- '\s+',' ','g')),3)
trim(lower(
regexp_replace(
regexp_replace(f.description,'\mof\M|\ma\M|\min\M','','g'),
'\s+',' ','g')))
from film f;
11 Postgres Generate Series and Inner vs Left Joins
-- 生成序列数据,左联合vs右联合
select * from(
select date::date, sum(p.amount), count(*) from generate_series(
'2007-01-01'::date,
'2007-12-31'::date,
'1 day'::interval
)date left join payment p on p.payment_date::date = date.date
--测试去掉left 或使用INNER 的查询结果
group by 1
order by 1
-- nulls last -- 查询sum为null的记录
)t
where t.count > 1
-- without left join, you reduce the result to only matching cases on either
-- side of the join in this case, that means we needed sales on the date...
12 Array Operations in Postgres – Grouping to An Array
-- 数组操作 - 数组分组
select f.film_id,count(*)
from film f
left join film_actor fa on f.film_id = fa.film_id
group by 1
order by 1;
/**/
select f.film_id,fa.*
from film f
left join film_actor fa on f.film_id = fa.film_id
order by 1,2;
/*****/
select f.film_id,count(*),
array_agg(fa.actor_id),
(array_agg(fa.actor_id))[1:3], -- 数组中为前3个演员的ID
(select sum(x) from unnest(array_agg(fa.actor_id)) x)
from film f
left join film_actor fa on f.film_id = fa.film_id
group by 1
order by 1,2
13 Finding Customers by Unique Genres Rented
-- 查找租借单一题材的客户
-- find customers who have rented from >=10 genres
-- you can select from any select, everything is a table!!!
select t2.customer_id,count(distinct t2.genre) from (
select t.customer_id,t.genre,count(*) from(
select r.rental_id,r.customer_id,r.inventory_id,f.title,fc.category_id,c.name as genre
from rental r
left join inventory i on r.inventory_id = i.inventory_id
left join film f on i.film_id = f.film_id
left join film_category fc on fc.film_id = f.film_id
left join category c on c.category_id = fc.category_id
)t
group by 1,2
order by 1,3 desc
)t2
group by 1
having count(*) > 10
order by 2;
14 SQL For Actors with the Most Films
-- 查询参演最多电影的演员
-- which film has the most actors? --哪支股票有最多的基金参与?
-- everything is a table, everything is table
-- find the actor with the most films?
-- actor_id, nbr of films they are in
select t.actor_id, count(distinct t.film_id) from (
select f.film_id,f.title,f.release_year,fa.actor_id,a.first_name,a.last_name
from film f
left join film_actor fa on f.film_id = fa.film_id
left join actor a on fa.actor_id = a.actor_id
order by 1,4
)t
group by 1
having count(distinct t.film_id) > 34
order by 2 desc;
15 Postgres – Top Movies by Genre Window Functions and CTEs
-- 按照题材WF和CTE列出最受欢迎的电影
-- Joins Joins Joins!!! what are the thop selling movies?
-- payment --> rental(inventory) --> inventory to find more about the movie?
-- Top selling movie per genre
-- 各种题材电影销量最好的(各个板块中涨幅最大的/涨幅最小且持续时间最长的)
-- common table expression
with base_table as (
select p.rental_id, p.amount,r.inventory_id,i.film_id,f.title,f.rating
from payment p
left join rental r on p.rental_id = r.rental_id
left join inventory i on i.inventory_id = r.inventory_id
left join film f on f.film_id = i.film_id
),next_table as(
select bt.title,bt.rating, sum(bt.amount)
from base_table bt
group by 1,2
order by 3 desc
)
select * from(
select nt.*,
row_number() over (partition by nt.rating order by nt.sum desc) as rating_rank
from next_table nt
)t
where t.rating_rank = 1;
16 SQL – everything is a table, some more joining basics
-- 所有的事情都是数据表,更多的一些联合基本用法
-- more grouping , counting, etc....
select t.film_id, count(*) from(
select f.film_id, f.title, f.release_year,fa.actor_id
from film f
left join film_actor fa on f.film_id = fa.film_id
-- in film, film_id is the primary key
order by 1,4
--which film has the most actors?\
--everything is a table
)t
group by 1
order by 2 desc;
/* ------------ */
select f.film_id, f.title, f.release_year,fa.actor_id,a.first_name,a.last_name
from film f
left join film_actor fa on f.film_id = fa.film_id
left join actor a on fa.actor_id = a.actor_id
order by 1,4
17 SQL – Find New Customers by Date of Acquisition
-- 按照购买日期查找新的客户
select * from (
select t.*,
CASE
WHEN t.dow = 0 THEN 'Sunday'
WHEN t.dow = 1 THEN 'Monday'
WHEN t.dow = 2 THEN 'Tuesday'
WHEN t.dow = 3 THEN 'Wednesday'
WHEN t.dow = 4 THEN 'Thurs'
WHEN t.dow = 5 THEN 'Fri'
WHEN t.dow = 6 THEN 'Saturday'
END as day_of_week
from(
select extract(dow from p.payment_date) as dow,sum(p.amount)::money
--::money 表示货币数据, as dow 字段标识为dow,替代payment_date
from payment p
group by 1
order by 1
)t
)t2
where t2.day_of_week = 'Fri' or t2.day_of_week = 'Thurs';
------
with first_payments as (
select p.customer_id,min(p.payment_date)::date
-- ::date 一定要有,否则查询结果就不是想要的结果
from payment p
group by 1
order by 2,1
)
-- new customers by date (common table expression)
select * from first_payments;
18 Abstract Intro the ROW_NUMBER Window Function Using Random Data
-- 日期属于哪个季度; 销售业绩最好的前5天,是几月几日
select date::date, extract(quarter from date) from generate_series(
'2007-01-01'::date,
'2007-12-31'::date,
'1 day'::interval)date
----
select * from (
select t.*,
row_number() over(partition by t.qtr order by t.sales desc) as day_rank
from (
select date::date, extract(quarter from date) as
qtr,floor(random()*10000)::numeric::money as sales
from generate_series(
'2007-01-01'::date,
'2007-12-31'::date,
'1 day'::interval)date
)t
-- top 5 performing days (sales_per quarte -- what are the dates?)
)t2
where t2.day_rank in (1,2,3,4,5) -- 每个季度销售业绩最好的5天
19 Simulate ROW_NUMBER() Without Window Functions
select * from(
select p.payment_id, p.customer_id,
-- correlated subquery:
-- inner query below is counting the nbr of times that the inner payment id <= outer pmt ID
-- inner always matches OUTER customer id, 以下内查询时间的编号,时间的符合条件为....
(
select count(*) from payment p2
where p2.payment_id <= p.payment_id and p2.customer_id = p.customer_id
) as row_nbr
from payment p
order by 2,1
)t where t.row_nbr =1;
20 Postgres – Use Row_Number to Find Customer First Orders
-- 找到客户的第一次下单
-- solve it uses row_number
select * from(
select p.payment_id, p.customer_id, p.payment_date,
Row_number() over(partition by p.customer_id order by p.payment_id asc) as cust_order_rank,
Row_number() over(partition by p.customer_id order by p.payment_date asc) as cust_order_rank_date
from payment p
order by 2,3
)t
where t.cust_order_rank_date = 1;
21 First Customer Order With Subquery But No Common Table Expression
-- 用子查询找到客户的第一个单子
-- more customer value: getting first orders and other data about it
-- payment, p.payment_id goes up over time, its a primary key
select p.customer_id,p.amount, min(p.payment_id) as first_pmt_id
from payment p
group by 1,2
order by 1
/*-------------------*/
select p.customer_id, min(p.payment_id) as first_pmt_id,
-- subquery to join the outerrows to this inner row
(
select p2.amount from payment p2
where p2.payment_id = min(p.payment_id)
) as first_pmt_amt,
(
select p2.rental_id from payment p2
where p2.payment_id = min(p.payment_id)
) as first_rnt_id
from payment p
group by 1
order by 1;
22 Using a Common Table Expression To Find Customer 1st Order Date
-- CTE (common table expression) -- 找到客户第一次下单的时间
With first_orders as (
select p.customer_id, min(p.payment_id) as first_pmt
from payment p
group by 1
)
-- select * from first_orders
-- order by 1
select * from payment p2
where p2.payment_id in (
select fo.first_pmt from first_orders fo
)
order by 2;
23 SQL – Break out Customer Orders by New vs Repeat
-- 把客户的首次下单和再次下单分出来
-- get the customer's first order date and the amount they spent
-- 得到客户首次下单的时间和话费的金额
select t.payment_date::date,
case
when t.row_number =1 then 'New_buyer'
else 'repeat_buyer'
end as order_type,
sum(t.amount)
from (
select
p.customer_id,
p.payment_date,
Row_number() over (partition by p.customer_id),
amount
from payment p
)t
group by 1,2
order by 1
-- order can be first order, or a prpeat order(if row_number =1, new else repeat)
-- date, rev for 1st time, rev for repeat
24 SQL – Do Customers Spend More On 1st or 2nd Order
-- 客户的第二次下单花费多或少
-- do they spend more or less on their second order?
/*----------------------------*/
with base_table as(
select * from (
select p.payment_id, p.customer_id, p.payment_date, p.amount,
row_number() over(partition by p.customer_id order by p.payment_date asc)
as cust_order_rank,
LAG(p.payment_date) over (partition by p.customer_id
order by p.payment_date asc) as prior_order,
p.payment_date -lag(p.payment_date) over (partition by p.customer_id
order by p.payment_date asc) as time_since,
floor(extract(epoch from p.payment_date - lag(p.payment_date)
over(partition by p.customer_id order by p.payment_date asc))/60)
as mins_since_prior_order
from payment p
order by 2,3
)t where t.cust_order_rank IN (1,2)
)
-- which order nbr of 1 or 2, has more money
select bt.cust_order_rank, sum(bt.amount),count(*)
from base_table bt
group by 1
/*------------------------------------------------------------*/
-- every customer has at least 2 orders , order rank 2 < order rank 1
-- 至少有两个订单的客户,订单排名2 < 订单排名1
with base_table as(
select * from (
select p.payment_id, p.customer_id, p.payment_date, p.amount,
row_number() over(partition by p.customer_id order by p.payment_date asc)
as cust_order_rank,
LAG(p.payment_date) over (partition by p.customer_id
order by p.payment_date asc) as prior_order,
p.payment_date -lag(p.payment_date) over (partition by p.customer_id
order by p.payment_date asc) as time_since,
floor(extract(epoch from p.payment_date - lag(p.payment_date)
over(partition by p.customer_id order by p.payment_date asc))/60)
as mins_since_prior_order
from payment p
order by 2,3
)t where t.cust_order_rank IN (1,2,3,4,5,6,7,8,9)
)
-- which order nbr of 1 or 2, has more money
select bt.cust_order_rank, sum(bt.amount),count(*),
sum(bt.amount)/count(*)
from base_table bt
group by 1
25 The SUM(CASE WHEN pattern in SQL to compute valued columns
-- get the customer's first order date and the amount they spent
-- 获得客户的第一个订单和他们的消费总额
select t.payment_date::date,
case
when t.row_number = 1 then 'New_buyer'
else 'repeat_buyer'
end as order_type,
sum(t.amount)
from (
select
p.customer_id,
p.payment_date,
row_number() over (partition by p.customer_id),
amount
from payment p
)t
group by 1,2
order by 1
/*----------------------------------------*/
-- max rising change of price
select t.payment_date::date,
sum (case when t.row_number = 1 then t.amount else 0 end)::money as new_buyer_revenues,
sum (case when t.row_number <> 1 then t.amount else 0 end)::money as repeat_buyer_revenues,
sum (case when t.row_number > 0 then t.amount else 0 end) as all_revenues,
sum (case when t.row_number = 1 then t.amount else 0 end)/
sum (case when t.row_number > 0 then t.amount else 0 end) as pct_from_new
from (
select
p.customer_id,
p.payment_date,
row_number() over (partition by p.customer_id),
amount
from payment p
)t
group by 1
order by 1
26 A Gentle Intro to the LAG() Window Function in SQL
-- LAG 窗口函数简介
-- feel good lag today is novermber 25,2016
-- select current_date, current_date - interval '364' days as ly
-- day,measure, measure for same time ly
with sales_data as(
select date::date, floor(random()*100000)::numeric::money as sales_this_yr
from generate_series(
'2015-01-01'::date,
'2016-12-31'::date,
'1 day'::interval
)date
)
-- Nov 25, 2016 black friday
-- the comp date for LY was 11/27/15
select sd.*,LAG(sd.sales_this_yr,364) over(), current_date::date - interval '364 days'
from sales_data sd;
27 Use LAG() To find timing between the 1st and 2nd Customer Order
-- 使用lag函数,查到客户的第一单和第二单之间的间隔时间
--what's the avg delay between the first and second order???
-- 第一个订单和第二个订单之间平均相隔几天?
-- 借鉴此类方法,计算Zen的各种几何片段????
with base_table As(
select * from (
select p.payment_id,p.customer_id,p.payment_date,
row_number() over(partition by p.customer_id order by p.payment_date ASC)
as cust_order_rank,
lag(p.payment_date) over(partition by p.customer_id order by p.payment_date ASC)
as prior_order,
p.payment_date - lag(p.payment_date) over(partition by p.customer_id order by
p.payment_date asc) as time_series,
floor(extract(epoch from p.payment_date - lag(p.payment_date)
over(partition by p.customer_id order by p.payment_date asc))/60)
as mins_since_prior_order
from payment p
order by 2,3
)t where t.cust_order_rank = 2 -- IN (1,2)
-- 第二单,或第一单和第二单
)
select
case
when bt.mins_since_prior_order < 100 then '< 100'
when bt.mins_since_prior_order < 200 then '< 200'
when bt.mins_since_prior_order < 300 then '< 300'
when bt.mins_since_prior_order < 400 then '< 400'
when bt.mins_since_prior_order < 500 then '< 500'
else ' >= 500' END as time_distribution,
count(*)
from base_table bt
group by 1
order by 2 desc
28 Use Correlated Subqueries to Find First 7 and First 30 Days Revenues
-- get the first order revenue and the forward 7 days sales
-- 前7天和前30天的销售额
With base_table as (
select p.payment_id, p.customer_id, p.payment_date, p.amount,
row_number() over(partition by p.customer_id order by p.payment_id asc)
as cust_order_rank_payment
from payment p
order by 2,3
)
select bt.*,
(
select sum(p2.amount) from payment p2
where p2.payment_date between bt.payment_date and bt.payment_date + INTERVAL '7 days'
and bt.customer_id = p2.customer_id
) fwd_7_days_sales,
-- correlated subquery
(
select sum(p2.amount) from payment p2
where p2.payment_date between bt.payment_date and bt.payment_date + INTERVAL '30 days'
and bt.customer_id = p2.customer_id
) fwd_30_days_sales
from base_table bt where bt.cust_order_rank_payment = 1;
29 PostgresSQL Row Number (Hourly sales for each staff member)
-- without left join, you reduce the results to only matching cases on either side of the join
-- in this case, that means needed sales on the date...
-- which is the most popular hour for each staff member?
-- 对员工会员而言,业绩最好的时间是哪个小时?
with hourly_sales as (
select p.staff_id, extract(hour from p.payment_date) as hour_of_day,sum(p.amount)
from payment p
group by 1,2
order by 3 desc, 1 desc
)
-- what hour is the best for each staff_id?
select * from (
select hs.*,
row_number() over (partition by hs.staff_id order by hs.sum desc)
from hourly_sales hs
)t
where t.row_number = 1
30 Compute Moving Averages Using Postgres
-- select sd.*, lag(sd.sales_this_yr,364) over(), sd.date - interval '364 days'
-- from sales_date sd
-- 7 day trailing MA
with sales_data as (
select date::date, floor(random()*10000)::numeric::money as sales_this_yr
from generate_series(
'2015-01-01'::date,
'2016-12-31'::date,
'1 day'::interval
)date
)
select sd.*,
floor(avg(sd.sales_this_yr::numeric) over (order by sd.date rows between 7 preceding and 0 following))
as l7davg,
sd.sales_this_yr::numeric - floor(avg(sd.sales_this_yr::numeric) over(order by sd.date rows
between 7 preceding and 0 following)) as vsavg
from sales_data sd;
-- time diffs between order by customers
31 SQL – Multiple Row_Number functions addressing different partitions
-- what about the top day per quarter and the top day per month?
select t.*,
row_number() over(partition by t.qtr order by t.sales desc) as day_rank_qtr,
row_number() over(partition by t.month order by t.sales desc) as day_rank_mo
from (
select date::date, extract(quarter from date) as qtr, extract(month from date) as month,
floor(random()*100000)::numeric::money as sales
from generate_series(
'2007-01-01'::date,
'2007-12-31'::date,
'1 day'::interval
)date
)t
32 Postgres split_part, plus CASE stmt tutorial
-- 美国员工和美国之外的员工
-- Live coding this (start with customer table)
-- splitting on email domain, combined like domains into groups
-- concatenating text, get first, last, etc.,,,
-- Grouping based on country, more joins and then a CASE statement
-- get earliest order date, correlated subquery!
-- get most recent order date, correlated subquery
-- days since most recent order, use current_date a bit
select
case
when t.country = 'United States' then 'USA'
else 'Rest of World' end as geo_type,
count(*) from (
select c.customer_id, c.first_name, c.last_name, c.email, c.address_id,
split_part(c.email,'@',2),c.last_name||', '||c.first_name as last_first,ci.city,
ci.country_id,co.country,
-- outer table, c.customer_id, inner table p.customer_id
(
select min(p.payment_date)
from payment p where p.customer_id = c.customer_id
)::date as cust_min_order_date,
(
select max(p.payment_date)
from payment p where p.customer_id = c.customer_id
)::date as cust_max_order_date,
current_date - (
select max(p.payment_date)
from payment p where p.customer_id = c.customer_id
)::date as days_since_recent_order
from customer c
left join address a on a.address_id = c.address_id
left join city ci on ci.city_id = a.city_id
left join country co on ci.country_id = co.country_id
)t
group by 1 order by 2 desc;
33 SQL Row Number _ Window Function Example – Top NY Baby Names by Year
发布者:股市刺客,转载请注明出处:https://www.95sca.cn/archives/76184
站内所有文章皆来自网络转载或读者投稿,请勿用于商业用途。如有侵权、不妥之处,请联系站长并出示版权证明以便删除。敬请谅解!