PostgreSQL复杂查询示例

本文内容是多年前观看一个英文学习视频作得记录,相关视频找不到了,发布在这里和学习者分享,也方便自已查看。

--SELECT * from payment;

--
SELECT p.*,c.* 
--FROM payment p LEFT JOIN customer c ON p.customer_id = c.customer_id
FROM payment p JOIN customer c ON p.customer_id = c.customer_id;
-- 上述两种JOIN的结果,14596条记录受影响

-- Rows in payament and i did not have a customer 
-- INNER JOIN only returns rows that matches in both tables;

-- ***** 
SELECT p.*,c.*,r.* 
FROM payment p 
	LEFT JOIN customer c ON p.customer_id = c.customer_id
	LEFT JOIN rental r ON p.rental_id = r.rental_id;

02 Basics of SQL Joins using DVD Rental Database


-- LEFT JOIN,INNER JOINS

select generate_series(1, 10);

-- column with 1 - 10
SELECT val.val, set.set FROM generate_series(1,10)val
join generate_series(5,15)set ON val.val = set.set;

SELECT val.val, set.set FROM generate_series(1,10)val
LEFT JOIN generate_series(5,15)set on val.val = set.set;  --keeps the columns in the first side

SELECT val.val, set.set FROM generate_series(1,10)val
Right join generate_series(5,15)set ON val.val = set.set; -- keeps the on the right side

-- column with 5 - 15
/* LEFT JOIN returns matches from the left table and those found on the right side of the join*/
--LEFT JOIN generate_series(5,16)set on val.val = set.set  --keeps the columns in the first side

/* RIGHT JOIN returns matches from the right table and those found on the left side of the join */
--Right join generate_series(5,15)set ON val.val = set.set -- keeps the on the right side

-- 以上向左结合查询和向右结合查询的结果不同

03 Intro To Date Parsing and Time Series Data in SQL

SELECT p.customer_id, c.first_name,
SUM(p.amount)::money, COUNT(*), 
SUM(p.amount)/COUNT(*) as AVG_RENTAL_VALUE
FROM payment p LEFT JOIN customer c ON p.customer_id = c.customer_id
WHERE p.customer_id > 300 AND c.first_name like '%b%'
GROUP BY 1,2
HAVING COUNT(*) > 20 
	--having is when you filter on aggregates, is a where clause for aggregates
ORDER BY 4 DESC;


SELECT p.*, payment_date::date, 
extract(year from p.payment_date),
extract(month from p.payment_date),
extract(day from p.payment_date)
FROM payment p
where extract(month from p.payment_date) in (1,2)


-- 0419
select p.payment_date, p.amount from payment p

select to_char(p.payment_date,'yyyy-mm'),SUM(p.amount),MAX(p.amount),MIN(p.amount)
from payment p group by 1
-- -- 按照月份合计,by 1 指查询结果的第一列

select SUM(p.amount),MAX(p.amount),MIN(p.amount), to_char(p.payment_date,'yyyy-mm')
from payment p
group by 4 order by 1 

04 SQL – Get Day of Week With extract() + CASE statement

-- what day of the week does the most revenue? to_chat(date,'dy')
-- 一周中获利最大的是哪一天?

select extract(dow from p.payment_date) from payment p;  -- 星期的第几天,sunday is zero

---
select to_char(p.payment_date,'dy') as dow,sum(p.amount)
from payment p
group by 1
order by 2 desc;

----
select * from (

	select t.*,

		CASE
			WHEN t.dow = 0 THEN 'Sunday'
			WHEN t.dow = 1 THEN 'Monday'
			WHEN t.dow = 2 THEN 'Tuesday'
			WHEN t.dow = 3 THEN 'Wednesday'
			WHEN t.dow = 4 THEN 'Thurs'
			WHEN t.dow = 5 THEN 'Fri'
			WHEN t.dow = 6 THEN 'Saturday'
		END as day_of_week

	from(
			select extract(dow from p.payment_date) as dow,sum(p.amount)::money   
					--::money 表示货币数据, as dow  字段标识为dow,替代payment_date
			from payment p 
			group by 1
			order by 1
		)t
)t2

where t2.day_of_week = 'Fri' or t2.day_of_week = 'Thurs';

05 More Date and Time Analysis Using SQL

-- 一周内各天的合计
select p.*, to_char(p.payment_date,'dy') as dow  -- dy 表示星期几的英文缩写
from payment p;

select to_char(p.payment_date,'dy') as dow,sum(p.amount)
from payment p
group by 1
order by 2 desc
limit 3;

06 SQL – Finding Top Grossing Genres Within Each Rating

-- top grossing films per genre, within each rating
-- 各种题材的影片观看数最高的,

WITH base_table AS (

select p.rental_id,p.amount,r.inventory_id,f.title,f.rating,c.name as genre
from payment p
	left join rental r on p.rental_id = r.rental_id
	left join inventory i on i.inventory_id = r.inventory_id
	left join film f on f.film_id = i.film_id
	
	left join film_category fc on fc.film_id = i.film_id
	left join category c on c.category_id = fc.category_id

)

SELECT * from (
	select t.*,

	ROW_NUMBER() over (partition by t.rating order by t.sales desc)

	from(
		select bt.rating, bt.genre, SUM(bt.amount) as sales
		from base_table bt
		group by 1,2
		order by 3 desc
	 )t


)radical

where radical.row_number = 1;

07 Handling Missing Dates in SQL

-- 处理缺失的日期
-- dealing with fragmented data (do not have data for each day in the range)
-- drilling the difference again between INNER and LEFT JOINs :)
-- using the payment table from the dvd rental database

-- 生成日期序列
select gs::date from generate_series(
'2007-01-01',
'2007-12-31',
interval '1 day'	
) gs

-- 查询租赁日合计为空的记录
With base_table as (
	
	--上句为后套入
	select gs::date,SUM(p.amount) as sales from generate_series(
	'2007-01-01',
	'2007-12-31',
	interval '1 day'	
	) gs LEFT JOIN payment p on gs=p.payment_date::date
	group by 1
	order by 1
)
	--下句为后套入

select extract(week from bt.gs),SUM(bt.sales)   --按照星期合计查询
from base_table bt
group by 1	
order by 1

08 Caution – Generate Series Illustrating Left vs Inner Joins and Dangers of Not Understanding Them

With base_table as (
	
	--上句为后套入
	select gs::date,SUM(p.amount) as sales from generate_series(
	'2007-01-01',
	'2007-12-31',
	interval '1 day'	
	) gs LEFT JOIN payment p on gs=p.payment_date::date  --去掉Left,则查询sum数据不为0的记录
	group by 1
	order by 1
)
	--下句为后套入

select extract(week from bt.gs),SUM(bt.sales)   --按照星期合计查询
from base_table bt
group by 1	
order by 1

09 Filtering your database using an external data source

-- 用外部数据源过滤数据库
-- get earliest order date, correlated subquery!
-- get most recent order date, correlated subquery
-- days since most recent order, use current_date a bit

select

case 
  when t.country = 'United States' THEN 'USA'
  else 'Rest of world' END as geo_type,
  
count(*) from (
select c.customer_id,c.first_name,c.last_name,c.email,c.address_id,
	   split_part(c.email,'@',2),c.last_name ||', '|| c.first_name as last_first,ci.city,
	   ci.country_id, co.country,
	-- outer table, c.customer_id, inner table p.customer_id
(
	select min(p.payment_date)
			from payment p where p.customer_id = c.customer_id	
)::date as cust_min_order_date,
	
(
	select max(p.payment_date)
			from payment p where p.customer_id = c.customer_id
)::date as cust_max_order_date,
	
current_date - (
	select max(p.payment_date)
		from payment p where p.customer_id = c.customer_id
)::date as days_since_recent_order
	
From customer c
		left join address a on a.address_id = c.address_id
		left join city ci on ci.city_id = a.city_id
		left join country co on ci.country_id = co.country_id
	
--say you have 10 email from a tradeshow and you want to check whethre or 
--not they exist in your customer table?
		
)t

group by 1 order by 2 desc;

-------------------
select

case 
  when t.country = 'United States' THEN 'USA'
  else 'Rest of world' END as geo_type,
  
count(*) from (
select c.customer_id,c.first_name,c.last_name,c.email,c.address_id,
	   split_part(c.email,'@',2),c.last_name ||', '|| c.first_name as last_first,ci.city,
	   ci.country_id, co.country,
	-- outer table, c.customer_id, inner table p.customer_id
(
	select min(p.payment_date)
			from payment p where p.customer_id = c.customer_id	
)::date as cust_min_order_date,
	
(
	select max(p.payment_date)
			from payment p where p.customer_id = c.customer_id
)::date as cust_max_order_date,
	
current_date - (
	select max(p.payment_date)
		from payment p where p.customer_id = c.customer_id
)::date as days_since_recent_order
	
From customer c
		left join address a on a.address_id = c.address_id
		left join city ci on ci.city_id = a.city_id
		left join country co on ci.country_id = co.country_id

--Inner Join on Outside of the DB data	
where c.email IN (
	
	'sonia.gregory@sakilacustomer.org',
	'miriam.mckinney@sakilacustomer.org',
	'velma.lucas@sakilacustomer.org',
	'becky.miles@sakilacustomer.org',
	'bobbie.craig@sakilacustomer.org',
	'violet.rodriquez@sakilacustomer.org',
	'kristina.chambers@sakilacustomer.org',
	'toni.holt@sakilacustomer.org',
	'misty.lambert@sakilacustomer.org',
	'mae.fletcher@sakilacustomer.org',
	'shelly.watts@sakilacustomer.org',
	'daisy.bates@sakilacustomer.org',
	'ramona.hale@sakilacustomer.org'

)	
	
)t

group by 1 order by 2 desc

10 Using postgres regexp_replace to clean text data

-- 使用 regexp_replace 清洗文本数据
-- Regular Expressions (clearning data)

-- Remove stop words
-- Lowercase all text
-- Trim text
-- then count by first letter of first name in title

-- remove of, a, in
-- word boundary

Select f.description,

--regexp_replace(f.description,'of|a|in','xxx','g')
--regexp_replace(f.description,'of|\ma\M|in','xxx','g')
--regexp_replace(f.description,'\mof\M|\ma\M|\min\M','xxx','g')
--regexp_replace(f.description,'\mof\M|\ma\M|\min\M','','g')
--regexp_replace(f.description,'\mof\M|\ma\M|\min\M|\s\s+','','g')
lower(
		regexp_replace(
			regexp_replace(f.description,'\mof\M|\ma\M|\min\M','','g'),
				'\s+',' ','g')),
	
--left(lower(
-- regexp_replace(
-- 			regexp_replace(f.description,'\mof\M|\ma\M|\min\M','','g'),
-- 				'\s+',' ','g')),3)
trim(lower(
		regexp_replace(
			regexp_replace(f.description,'\mof\M|\ma\M|\min\M','','g'),
				'\s+',' ','g')))

from film f;

11 Postgres Generate Series and Inner vs Left Joins

--  生成序列数据,左联合vs右联合
			
select * from(
	select date::date, sum(p.amount), count(*) from generate_series(
	'2007-01-01'::date,
	'2007-12-31'::date,
	'1 day'::interval

	)date left join payment p on p.payment_date::date = date.date 
		--测试去掉left 或使用INNER 的查询结果
	group by 1
	order by 1
-- nulls last -- 查询sum为null的记录
)t

where t.count > 1


-- without left join, you reduce the result to only matching cases on either
-- side of the join in this case, that means we needed sales on the date...

12 Array Operations in Postgres – Grouping to An Array

-- 数组操作 - 数组分组
			
select f.film_id,count(*)

from film f
		left join film_actor fa on f.film_id = fa.film_id
group by 1
order by 1;

/**/
select f.film_id,fa.*

from film f
		left join film_actor fa on f.film_id = fa.film_id
order by 1,2;

/*****/
select f.film_id,count(*),
	array_agg(fa.actor_id),
	(array_agg(fa.actor_id))[1:3],	-- 数组中为前3个演员的ID
	(select sum(x) from unnest(array_agg(fa.actor_id)) x)

from film f
		left join film_actor fa on f.film_id = fa.film_id
group by 1
order by 1,2

13 Finding Customers by Unique Genres Rented

-- 查找租借单一题材的客户
-- find customers who have rented from >=10 genres
-- you can select from any select, everything is a table!!!

select t2.customer_id,count(distinct t2.genre) from (

	select t.customer_id,t.genre,count(*) from(

		select r.rental_id,r.customer_id,r.inventory_id,f.title,fc.category_id,c.name as genre
		from rental r 
			left join inventory i on r.inventory_id = i.inventory_id
			left join film f on i.film_id = f.film_id
			left join film_category fc on fc.film_id = f.film_id
			left join category c on c.category_id = fc.category_id

	)t

	group by 1,2
	order by 1,3 desc

)t2

group by 1
having count(*) > 10
order by 2;

             

14 SQL For Actors with the Most Films

-- 查询参演最多电影的演员
-- which film has the most actors?  --哪支股票有最多的基金参与?
-- everything is a table, everything is table
-- find the actor with the most films?
-- actor_id, nbr of films they are in

select t.actor_id, count(distinct t.film_id) from (

	select f.film_id,f.title,f.release_year,fa.actor_id,a.first_name,a.last_name

	from film f 
			left join film_actor fa on f.film_id = fa.film_id
			left join actor a on fa.actor_id = a.actor_id

	order by 1,4

)t

group by 1
having count(distinct t.film_id) > 34
order by 2 desc;

15 Postgres – Top Movies by Genre Window Functions and CTEs

-- 按照题材WF和CTE列出最受欢迎的电影
-- Joins Joins Joins!!! what are the thop selling movies?

-- payment --> rental(inventory) --> inventory to find more about the movie?
-- Top selling movie per genre 
		-- 各种题材电影销量最好的(各个板块中涨幅最大的/涨幅最小且持续时间最长的)

-- common table expression

with base_table as (

	select p.rental_id, p.amount,r.inventory_id,i.film_id,f.title,f.rating
	from payment p 
			left join rental r on p.rental_id = r.rental_id
			left join inventory i on i.inventory_id = r.inventory_id
			left join film f on f.film_id = i.film_id

),next_table as(

select bt.title,bt.rating, sum(bt.amount)
from base_table bt
group by 1,2
order by 3 desc

)

select * from(
	
	select nt.*,
	row_number() over (partition by nt.rating order by nt.sum desc) as rating_rank
	from next_table nt
	
)t

where t.rating_rank = 1;

16 SQL – everything is a table, some more joining basics

-- 所有的事情都是数据表,更多的一些联合基本用法
-- more grouping , counting, etc....

select t.film_id, count(*) from(

	select f.film_id, f.title, f.release_year,fa.actor_id

	from film f
			left join film_actor fa on f.film_id = fa.film_id

		 -- in film, film_id is the primary key
	order by 1,4

	--which film has the most actors?\
	--everything is a table
)t

group by 1
order by 2 desc;

/* ------------ */
select f.film_id, f.title, f.release_year,fa.actor_id,a.first_name,a.last_name

	from film f
			left join film_actor fa on f.film_id = fa.film_id
			left join actor a on fa.actor_id = a.actor_id
			
			
	order by 1,4

17 SQL – Find New Customers by Date of Acquisition

-- 按照购买日期查找新的客户
							   
select * from (

	select t.*,

		CASE
			WHEN t.dow = 0 THEN 'Sunday'
			WHEN t.dow = 1 THEN 'Monday'
			WHEN t.dow = 2 THEN 'Tuesday'
			WHEN t.dow = 3 THEN 'Wednesday'
			WHEN t.dow = 4 THEN 'Thurs'
			WHEN t.dow = 5 THEN 'Fri'
			WHEN t.dow = 6 THEN 'Saturday'
		END as day_of_week

	from(

			select extract(dow from p.payment_date) as dow,sum(p.amount)::money   
			--::money 表示货币数据, as dow  字段标识为dow,替代payment_date
			from payment p 
			group by 1
			order by 1
		)t
)t2

where t2.day_of_week = 'Fri' or t2.day_of_week = 'Thurs';

------
with first_payments as (

	select p.customer_id,min(p.payment_date)::date   
		-- ::date 一定要有,否则查询结果就不是想要的结果
	from payment p
	group by 1
	order by 2,1

)

-- new customers by date (common table expression)
select * from first_payments;

18 Abstract Intro the ROW_NUMBER Window Function Using Random Data

-- 日期属于哪个季度; 销售业绩最好的前5天,是几月几日
select date::date, extract(quarter from date) from generate_series(
'2007-01-01'::date,
'2007-12-31'::date,
'1 day'::interval)date

----
select * from (

	select t.*,
		row_number() over(partition by t.qtr order by t.sales desc) as day_rank

	from (

		select date::date, extract(quarter from date) as 
			qtr,floor(random()*10000)::numeric::money as sales
		from generate_series(
		'2007-01-01'::date,
		'2007-12-31'::date,
		'1 day'::interval)date

	)t
-- top 5 performing days (sales_per quarte -- what are the dates?)
)t2

where t2.day_rank in (1,2,3,4,5)  -- 每个季度销售业绩最好的5天

19 Simulate ROW_NUMBER() Without Window Functions

select * from(

	select p.payment_id, p.customer_id,
		-- correlated subquery:
		-- inner query below is counting the nbr of times that the inner payment id <= outer pmt ID
		-- inner always matches OUTER customer id, 以下内查询时间的编号,时间的符合条件为....
	(
		select count(*) from payment p2 
			where p2.payment_id <= p.payment_id and p2.customer_id = p.customer_id

	) as row_nbr

	from payment p
	order by 2,1

)t where t.row_nbr =1;	

20 Postgres – Use Row_Number to Find Customer First Orders

-- 找到客户的第一次下单
-- solve it uses row_number

select * from(

	select p.payment_id, p.customer_id, p.payment_date,
	Row_number() over(partition by p.customer_id order by p.payment_id asc) as cust_order_rank,
	Row_number() over(partition by p.customer_id order by p.payment_date asc) as cust_order_rank_date

	from payment p 
	order by 2,3

)t

where t.cust_order_rank_date = 1;

21 First Customer Order With Subquery But No Common Table Expression

-- 用子查询找到客户的第一个单子
							   
-- more customer value: getting first orders and other data about it
-- payment, p.payment_id goes up over time, its a primary key

select p.customer_id,p.amount, min(p.payment_id) as first_pmt_id
from payment p
group by 1,2
order by 1

/*-------------------*/
select p.customer_id, min(p.payment_id) as first_pmt_id,

-- subquery to join the outerrows to this inner row
(
	select p2.amount from payment p2
		where p2.payment_id = min(p.payment_id)
) as first_pmt_amt,

(
	select p2.rental_id from payment p2
		where p2.payment_id = min(p.payment_id)
) as first_rnt_id

from payment p
group by 1 
order by 1;

22 Using a Common Table Expression To Find Customer 1st Order Date

-- CTE (common table expression) -- 找到客户第一次下单的时间

With first_orders as (

	select p.customer_id, min(p.payment_id) as first_pmt
	from payment p
	group by 1
	
)

-- select * from first_orders
-- order by 1

select * from payment p2
where p2.payment_id in (
						 select fo.first_pmt from first_orders fo						
						)

order by 2;

23 SQL – Break out Customer Orders by New vs Repeat

-- 把客户的首次下单和再次下单分出来
-- get the customer's first order date and the amount they spent
-- 得到客户首次下单的时间和话费的金额
							   
select t.payment_date::date,
	case
		when t.row_number =1 then 'New_buyer'
		else 'repeat_buyer'
		end as order_type,
		
		sum(t.amount)
from (		
		select 
			p.customer_id, 
			p.payment_date, 
			Row_number() over (partition by p.customer_id),
			amount
		from payment p
	)t
group by 1,2
order by 1
	
-- order can be first order, or a prpeat order(if row_number =1, new else repeat)
-- date, rev for 1st time, rev for repeat

24 SQL – Do Customers Spend More On 1st or 2nd Order

-- 客户的第二次下单花费多或少 
-- do they spend more or less on their second order?

/*----------------------------*/
with base_table as(

	select * from (

		select p.payment_id, p.customer_id, p.payment_date, p.amount,
				row_number() over(partition by p.customer_id order by p.payment_date asc)
							as cust_order_rank,
				LAG(p.payment_date) over (partition by p.customer_id 
							order by p.payment_date asc) as prior_order,
				p.payment_date -lag(p.payment_date) over (partition by p.customer_id 
							order by p.payment_date asc) as time_since,
				floor(extract(epoch from p.payment_date - lag(p.payment_date)
							 over(partition by p.customer_id order by p.payment_date asc))/60)
							as mins_since_prior_order
		from payment p
		order by 2,3

	)t where t.cust_order_rank IN (1,2)
)
-- which order nbr of 1 or 2, has more money

select bt.cust_order_rank, sum(bt.amount),count(*)
from base_table bt
group by 1

/*------------------------------------------------------------*/
-- every customer has at least 2 orders , order rank 2 < order rank 1
-- 至少有两个订单的客户,订单排名2 < 订单排名1
		
with base_table as(

	select * from (

		select p.payment_id, p.customer_id, p.payment_date, p.amount,
				row_number() over(partition by p.customer_id order by p.payment_date asc)
						as cust_order_rank,
				LAG(p.payment_date) over (partition by p.customer_id 
						order by p.payment_date asc) as prior_order,
				p.payment_date -lag(p.payment_date) over (partition by p.customer_id 
						order by p.payment_date asc) as time_since,
				floor(extract(epoch from p.payment_date - lag(p.payment_date)
							 over(partition by p.customer_id order by p.payment_date asc))/60)
								as mins_since_prior_order
		from payment p
		order by 2,3

	)t where t.cust_order_rank IN (1,2,3,4,5,6,7,8,9)
)
-- which order nbr of 1 or 2, has more money

select bt.cust_order_rank, sum(bt.amount),count(*),
sum(bt.amount)/count(*)
from base_table bt
group by 1

25 The SUM(CASE WHEN pattern in SQL to compute valued columns

-- get the customer's first order date and the amount they spent
-- 获得客户的第一个订单和他们的消费总额		

select t.payment_date::date,

case 
	when t.row_number = 1 then 'New_buyer'
	else 'repeat_buyer'
	end as order_type,
	
	sum(t.amount)
	
from (

	select 
		p.customer_id,
		p.payment_date,
		row_number() over (partition by p.customer_id),
		amount
	from payment p
	
)t

group by 1,2
order by 1

/*----------------------------------------*/

-- max rising change of price

select t.payment_date::date,

sum (case when t.row_number = 1 then t.amount else 0 end)::money as new_buyer_revenues,
sum (case when t.row_number <> 1 then t.amount else 0 end)::money as repeat_buyer_revenues,
sum (case when t.row_number > 0 then t.amount else 0 end) as all_revenues,

sum (case when t.row_number = 1 then t.amount else 0 end)/ 
			sum (case when t.row_number > 0 then t.amount else 0 end) as pct_from_new

from (

	select 
		p.customer_id,
		p.payment_date,
		row_number() over (partition by p.customer_id),
		amount
	from payment p
	
)t

group by 1 
order by 1

26 A Gentle Intro to the LAG() Window Function in SQL

-- LAG 窗口函数简介
-- feel good lag today is novermber 25,2016
-- select current_date, current_date - interval '364' days as ly
-- day,measure, measure for same time ly

with sales_data as(

	select date::date, floor(random()*100000)::numeric::money as sales_this_yr
	from generate_series(
	'2015-01-01'::date,
	'2016-12-31'::date,
	'1 day'::interval	
	)date

)
-- Nov 25, 2016 black friday
-- the comp date for LY was 11/27/15

select sd.*,LAG(sd.sales_this_yr,364) over(), current_date::date - interval '364 days'

from sales_data sd;

27 Use LAG() To find timing between the 1st and 2nd Customer Order

-- 使用lag函数,查到客户的第一单和第二单之间的间隔时间
--what's the avg delay between the first and second order???
-- 第一个订单和第二个订单之间平均相隔几天?
-- 借鉴此类方法,计算Zen的各种几何片段????

with base_table As(

	select * from (

		select p.payment_id,p.customer_id,p.payment_date,
				row_number() over(partition by p.customer_id order by p.payment_date ASC) 
							as cust_order_rank,
				lag(p.payment_date) over(partition by p.customer_id order by p.payment_date ASC) 
							as prior_order,		
				p.payment_date - lag(p.payment_date) over(partition by p.customer_id order by
							p.payment_date asc) as time_series,
				floor(extract(epoch from p.payment_date - lag(p.payment_date) 
							  over(partition by p.customer_id order by p.payment_date asc))/60) 
							as mins_since_prior_order
		from payment p
		order by 2,3

	)t where t.cust_order_rank = 2 -- IN (1,2)  
		-- 第二单,或第一单和第二单

)

select 

	case
		when bt.mins_since_prior_order < 100 then '< 100'
		when bt.mins_since_prior_order < 200 then '< 200'
		when bt.mins_since_prior_order < 300 then '< 300'
		when bt.mins_since_prior_order < 400 then '< 400'
		when bt.mins_since_prior_order < 500 then '< 500'
		else ' >= 500' END as time_distribution,
		
		count(*)

from base_table bt
group by 1
order by 2 desc

28 Use Correlated Subqueries to Find First 7 and First 30 Days Revenues

-- get the first order revenue and the forward 7 days sales
-- 前7天和前30天的销售额

With base_table as (
		select p.payment_id, p.customer_id, p.payment_date, p.amount,
		row_number() over(partition by p.customer_id order by p.payment_id asc) 
						as cust_order_rank_payment
		from payment p
		order by 2,3
	
)

select bt.*,

(
	select sum(p2.amount) from payment p2
	where p2.payment_date between bt.payment_date and bt.payment_date + INTERVAL '7 days'
						  and bt.customer_id = p2.customer_id	
) fwd_7_days_sales,

	-- correlated subquery
(
	select sum(p2.amount) from payment p2
	where p2.payment_date between bt.payment_date and bt.payment_date + INTERVAL '30 days'
						  and bt.customer_id = p2.customer_id	
) fwd_30_days_sales

from base_table bt where bt.cust_order_rank_payment = 1;

29 PostgresSQL Row Number (Hourly sales for each staff member)

-- without left join, you reduce the results to only matching cases on either side of the join
-- in this case, that means needed sales on the date...

-- which is the most popular hour for each staff member?
-- 对员工会员而言,业绩最好的时间是哪个小时?

with hourly_sales as (

	select p.staff_id, extract(hour from p.payment_date) as hour_of_day,sum(p.amount)
	from payment p
	group by 1,2
	order by 3 desc, 1 desc

)
-- what hour is the best for each staff_id?

select * from (

	select hs.*,
		row_number() over (partition by hs.staff_id order by hs.sum desc)

	from hourly_sales hs

)t

where t.row_number = 1

30 Compute Moving Averages Using Postgres

-- select sd.*, lag(sd.sales_this_yr,364) over(), sd.date - interval '364 days'

-- from sales_date sd

-- 7 day trailing MA

with sales_data as (

	select date::date, floor(random()*10000)::numeric::money as sales_this_yr
	from generate_series(
	'2015-01-01'::date,
	'2016-12-31'::date,
	'1 day'::interval	
	)date
)	
select sd.*,
	floor(avg(sd.sales_this_yr::numeric) over (order by sd.date rows between 7 preceding and 0 following)) 
					as l7davg,
	sd.sales_this_yr::numeric - floor(avg(sd.sales_this_yr::numeric) over(order by sd.date rows 
					between 7 preceding and 0 following)) as vsavg
		
from sales_data sd;

-- time diffs between order by customers

31 SQL – Multiple Row_Number functions addressing different partitions

-- what about the top day per quarter and the top day per month?


select t.*,
	row_number() over(partition by t.qtr order by t.sales desc) as day_rank_qtr,
	row_number() over(partition by t.month order by t.sales desc) as day_rank_mo

from (
	
	select date::date, extract(quarter from date) as qtr, extract(month from date) as month,
	floor(random()*100000)::numeric::money as sales
	from generate_series(
	'2007-01-01'::date,
	'2007-12-31'::date,
	'1 day'::interval	
	)date
	
)t

32 Postgres split_part, plus CASE stmt tutorial

-- 美国员工和美国之外的员工
																		  
-- Live coding this (start with customer table)
   -- splitting on email domain, combined like domains into groups
   -- concatenating text, get first, last, etc.,,,
   -- Grouping based on country, more joins and then a CASE statement
   -- get earliest order date, correlated subquery!
   -- get most recent order date, correlated subquery
   -- days since most recent order, use current_date a bit

select 
	
	case 
		when t.country = 'United States' then 'USA' 
		else 'Rest of World' end as geo_type,

		count(*) from (

			select c.customer_id, c.first_name, c.last_name, c.email, c.address_id,
			split_part(c.email,'@',2),c.last_name||', '||c.first_name as last_first,ci.city,
			ci.country_id,co.country,

			-- outer table, c.customer_id, inner table p.customer_id

			(
				select min(p.payment_date)
						from payment p where p.customer_id = c.customer_id
			)::date as cust_min_order_date,

			(
				select max(p.payment_date)
						from payment p where p.customer_id = c.customer_id
			)::date as cust_max_order_date,

			current_date - (
				select max(p.payment_date)
						from payment p where p.customer_id = c.customer_id
			)::date as days_since_recent_order

			from customer c
					left join  address a on a.address_id = c.address_id
					left join city ci on ci.city_id = a.city_id
					left join country co on ci.country_id = co.country_id

		)t

		group by 1 order by 2 desc;

33 SQL Row Number _ Window Function Example – Top NY Baby Names by Year

发布者:股市刺客,转载请注明出处:https://www.95sca.cn/archives/76184
站内所有文章皆来自网络转载或读者投稿,请勿用于商业用途。如有侵权、不妥之处,请联系站长并出示版权证明以便删除。敬请谅解!

(0)
股市刺客的头像股市刺客
上一篇 2024 年 7 月 11 日
下一篇 2024 年 7 月 11 日

相关推荐

发表回复

您的电子邮箱地址不会被公开。 必填项已用 * 标注