这样直接一个查询的话,最后结果小数部分和参考答案跑出来的有些差异,我前几题都这么写,前一题试了下小数保留4位,两种写法结果是一样的
SELECT
CAST(SUM(CASE WHEN pay_amount = 144 THEN pay_amount / (DATEDIFF(end_date,begin_date)+1) ELSE NULL END) AS DECIMAL(10,2)) AS yearly,
CAST(SUM(CASE WHEN pay_amount = 90 THEN pay_amount / (DATEDIFF(end_date,begin_date)+1) ELSE NULL END) AS DECIMAL(10,2)) AS half_yearly,
CAST(SUM(CASE WHEN pay_amount = 51 THEN pay_amount / (DATEDIFF(end_date,begin_date)+1) ELSE NULL END) AS DECIMAL(10,2)) AS quarterly,
CAST(SUM(CASE WHEN pay_amount = 19 THEN pay_amount / (DATEDIFF(end_date,begin_date)+1) ELSE NULL END) AS DECIMAL(10,2)) AS monthly,
CAST(SUM(pay_amount / (DATEDIFF(end_date,begin_date)+1)) AS DECIMAL(10,2)) AS total_income
FROM bilibili_m2 AS m2
LEFT JOIN bilibili_m1 AS m1
ON m2.m_date BETWEEN m1.begin_date AND m1.end_date
WHERE m2.m_date RLIKE '^2021'
WITH usr_act AS (
SELECT usr_id,live_id,enter_time AS event_time ,1 AS act
FROM ks_live_t1
UNION ALL
SELECT usr_id,live_id,leave_time AS event_time, -1 AS act
FROM ks_live_t1
/* FROM 子句不要忘写 */
),
live_online_users AS (
SELECT live_id,event_time,
SUM(act) OVER(PARTITION BY live_id
ORDER BY event_time) AS online_users
FROM usr_act
),
live_online_users_rnk AS ( /* 也可以利用窗口函数 MAX() OVER() */
SELECT live_id,event_time,online_users,
RANK() OVER(PARTITION BY live_id
ORDER BY online_users DESC) AS rnk
FROM live_online_users
),
live_online_user_peak AS (
SELECT live_id, event_time,online_users
FROM live_online_users_rnk
WHERE rnk = 1
),
/* 本来想用FIRST_VALUE() OVER() 和 LAST_VALUE() OVER()的,不过分区内多行的话,好像不能像GROUP BY 那样轻易变成1行 */
live_online_user_peak_time AS (
SELECT live_id,online_users AS max_online_users,
MIN(event_time) AS first_peak_time,
MAX(event_time) AS last_peak_time
FROM live_online_user_peak
GROUP BY live_id,online_users
)
SELECT t1.live_id,t2.live_nm,t1.max_online_users,
t1.first_peak_time,
t1.last_peak_time
FROM live_online_user_peak_time AS t1
LEFT JOIN ks_live_t2 AS t2
ON t1.live_id = t2.live_id
ORDER BY max_online_users DESC
WITH usr_typ AS (
SELECT t20.usr_id,t20.v_id,t20.v_tm,t3.v_typ
FROM bilibili_t20 AS t20
LEFT JOIN bilibili_t3 AS t3
ON t20.v_id = t3.v_id
WHERE DATE(t20.v_tm) >= '2021-02-05' /* 或者用 t20.v_tm RLIKE '^2021-02-0[5-8]' */
)
SELECT ta.v_typ,
COUNT(DISTINCT ta.usr_id) AS total_views,
COUNT(DISTINCT tb.usr_id) AS retained_users,
CAST(COUNT(DISTINCT tb.usr_id) / COUNT(DISTINCT ta.usr_id)*100 AS decimal(5,2)) AS retention_rate
FROM usr_typ AS ta
LEFT JOIN usr_typ AS tb
ON ta.usr_id = tb.usr_id AND DATEDIFF(tb.v_tm,ta.v_tm) BETWEEN 1 AND 3
WHERE ta.v_tm RLIKE '^2021-02-05'
GROUP BY ta.v_typ
ORDER BY retention_rate DESC
WITH new_user AS (
SELECT usr_id,MIN(v_date) AS first_login
FROM bilibili_t100
GROUP BY usr_id
)
SELECT nu.first_login AS login_date,
COUNT(DISTINCT nu.usr_id) AS new_users,
COUNT(DISTINCT CASE WHEN t.m_flg = 1 THEN nu.usr_id ELSE NULL END) AS new_members,
CAST(COUNT(DISTINCT CASE WHEN t.m_flg = 1 THEN nu.usr_id ELSE NULL END) / COUNT(DISTINCT nu.usr_id)*100 AS decimal(5,2)) AS conversion_rate
FROM new_user AS nu
LEFT JOIN bilibili_t100 AS t
ON nu.usr_id = t.usr_id AND nu.first_login = t.v_date
GROUP BY login_date
ORDER BY login_date
WITH new_user AS (
SELECT usr_id,MIN(v_date) AS first_login
FROM bilibili_t100
GROUP BY usr_id
)
SELECT nu.first_login AS login_date,
COUNT(DISTINCT nu.usr_id) AS new_users,
COUNT(DISTINCT CASE WHEN t.m_flg = 1 THEN nu.usr_id ELSE NULL END) AS new_members,
CAST(COUNT(DISTINCT CASE WHEN t.m_flg = 1 THEN nu.usr_id ELSE NULL END) / COUNT(DISTINCT nu.usr_id)*100 AS decimal(5,2)) AS conversion_rate
FROM new_user AS nu
LEFT JOIN bilibili_t100 AS t
ON nu.usr_id = t.usr_id AND nu.first_login = t.v_date
GROUP BY login_date
ORDER BY login_date
SELECT cust_uid
FROM mt_trx_rcd1
GROUP BY cust_uid
HAVING GROUP_CONCAT(DISTINCT mch_nm) RLIKE '(庄家界\(千灯店\).+?黄记烘培宫廷桃酥王)'
可以先观察下GROUP_CONCAT()的结果里两家店的前后顺序,再决定正则的写法
忘了第一次的时候是怎么写的。。。反正现在跑出来的和参考答案不一样
SELECT st.student_id, st.name, sc.score,
ROW_NUMBER() OVER(ORDER BY sc.score DESC) AS rnk
FROM students AS st
LEFT JOIN scores AS sc
ON st.student_id = sc.student_id
WHERE subject = '物理' AND grade_code = 'S1'
LIMIT 10
发现ROW_NUMBER() OVER() 里PARTITION BY grade_code 的有无,好像会影响成绩相同时的排序
本来第一遍写的跑出来显示正确,但多想了下发现好像有点问题,
单就T+3而言
我跑出来显示正确的写法,两表自联结条件之一是表b登录日期与表a登录日期之差DATEDIFF(tb.login_date, ta.login_date) BETWEEN 2 AND 3
参考答案写法是DISTINCT CASE WHEN days_diff BETWEEN 2 AND 3 THEN usr_id END,
但细想了下,觉得自联结条件应该是DATEDIFF(tb.login_date, ta.login_date) BETWEEN 1 AND 3
相应地,参考答案写法似乎应该是DISTINCT CASE WHEN days_diff BETWEEN 1 AND 3 THEN usr_id END
我也忘了最开始为什么会写BETWEEN 2 AND 3…………
WITH user_login_date AS (
SELECT DISTINCT usr_id, DATE(login_time) AS login_date
FROM user_login_log
WHERE DATEDIFF(CURDATE(), DATE(login_time)) <= 90
)
SELECT ta.login_date,
CAST(COUNT(DISTINCT tb.usr_id) / COUNT(DISTINCT ta.usr_id)*100 AS decimal(10,2)) AS t_plus_3_retention_rate,
CAST(COUNT(DISTINCT tc.usr_id) / COUNT(DISTINCT ta.usr_id)*100 AS decimal(10,2)) AS t_plus_7_retention_rate,
CAST(COUNT(DISTINCT td.usr_id) / COUNT(DISTINCT ta.usr_id)*100 AS decimal(10,2)) AS t_plus_14_retention_rate
FROM user_login_date AS ta
LEFT JOIN user_login_date AS tb
ON ta.usr_id = tb.usr_id AND DATEDIFF(tb.login_date,ta.login_date) BETWEEN 2 AND 3
LEFT JOIN user_login_date AS tc
ON ta.usr_id = tc.usr_id AND DATEDIFF(tc.login_date,ta.login_date) BETWEEN 2 AND 7
LEFT JOIN user_login_date AS td
ON ta.usr_id = td.usr_id AND DATEDIFF(td.login_date,ta.login_date) BETWEEN 2 AND 14
GROUP BY ta.login_date
ORDER BY ta.login_date DESC
WITH user_login_date AS (
SELECT DISTINCT usr_id, DATE(login_time) AS login_date
FROM user_login_log
WHERE /* DATE(login_time) >= DATE_SUB(CURDATE(), INTERVAL 30 DAY) */
DATEDIFF(CURDATE(), DATE(login_time)) <= 30
/* DATEDIFF() 能比 DATE_SUB()、DATE_ADD() 少写几个字符…… */
)
SELECT
ta.login_date AS first_login_date,
CAST(COUNT(tb.usr_id) / COUNT(*)*100 AS decimal(10,2)) AS t_plus_1_retention_rate,
CAST(COUNT(tc.usr_id) / COUNT(*)*100 AS decimal(10,2)) AS t_plus_3_retention_rate,
CAST(COUNT(td.usr_id) / COUNT(*)*100 AS decimal(10,2)) AS t_plus_7_retention_rate,
CAST(COUNT(te.usr_id) / COUNT(*)*100 AS decimal(10,2)) AS t_plus_14_retention_rate
FROM user_login_date AS ta
LEFT JOIN user_login_date AS tb
ON ta.usr_id = tb.usr_id AND DATEDIFF(tb.login_date, ta.login_date) = 1
LEFT JOIN user_login_date AS tc
ON ta.usr_id = tc.usr_id AND DATEDIFF(tc.login_date, ta.login_date) = 3
LEFT JOIN user_login_date AS td
ON ta.usr_id = td.usr_id AND DATEDIFF(td.login_date, ta.login_date) = 7
LEFT JOIN user_login_date AS te
ON ta.usr_id = te.usr_id AND DATEDIFF(te.login_date, ta.login_date) = 14
GROUP BY ta.login_date
ORDER BY ta.login_date
9分这位并列第二的店有三家,也就是说前三要在这三家里取二,
我这里跑出来是取了秀水餐厅、黄记烘培宫廷桃酥王,和参考答案出来的不一样,只有一个排序条件下相同值究竟怎么排的。。。
WITH rfm_inf AS (
SELECT cust_uid AS user_id,
DATEDIFF(CURDATE(), MAX(trx_dt)) AS re,
COUNT(DISTINCT trx_dt) AS fre,
AVG(trx_amt) AS mo
FROM mt_trx_rcd_f
GROUP BY user_id
ORDER BY user_id
),
rfm_score AS (
SELECT user_id,
NTILE(3) OVER(ORDER BY re DESC) AS recency_score,
NTILE(3) OVER(ORDER BY fre) AS frequency_score,
NTILE(3) OVER(ORDER BY mo DESC) AS monetary_score
FROM rfm_inf
ORDER BY user_id
),
mch_nm_rnk AS (
SELECT rs.*,mf.mch_nm,
COUNT(*) AS cnt,
ROW_NUMBER() OVER(PARTITION BY rs.user_id
ORDER BY COUNT(*) DESC) AS rnk
FROM rfm_score AS rs
LEFT JOIN mt_trx_rcd_f AS mf
ON rs.user_id = mf.cust_uid
WHERE recency_score + frequency_score + monetary_score = 9
GROUP BY rs.user_id, mf.mch_nm,rs.recency_score,rs.frequency_score,rs.monetary_score
ORDER BY rs.user_id,rnk
)
SELECT user_id, recency_score, frequency_score,monetary_score,
CONCAT(MAX(CASE WHEN rnk = 1 THEN mch_nm ELSE NULL END),', ',
MAX(CASE WHEN rnk = 2 THEN mch_nm ELSE NULL END),', ',
MAX(CASE WHEN rnk = 3 THEN mch_nm ELSE NULL END)) AS Top3_mch_nm
FROM mch_nm_rnk
GROUP BY user_id
重新试了下,单独求frequency,10017分到1、10021分到2
SELECT cust_uid AS user_id,
COUNT(DISTINCT trx_dt) AS fre,
NTILE(3) OVER(ORDER BY COUNT(DISTINCT trx_dt)) AS frequency_score
FROM mt_trx_rcd_f
GROUP BY user_id
ORDER BY user_id
不过我是三值一起求的,就反过来了
SELECT cust_uid AS user_id,
/* DATEDIFF(CURDATE(), MAX(trx_dt)) AS re,*/
NTILE(3) OVER(ORDER BY DATEDIFF(CURDATE(), MAX(trx_dt)) DESC) AS recency_score,
/* COUNT(DISTINCT trx_dt) AS fre,*/
NTILE(3) OVER(ORDER BY COUNT(DISTINCT trx_dt)) AS frequency_score,
/* AVG(trx_amt) AS mo,*/
NTILE(3) OVER(ORDER BY AVG(trx_amt)) AS monetary_score
FROM mt_trx_rcd_f
GROUP BY user_id
ORDER BY user_id
WITH 用太多会不会不太好,性能会不会有明显差异
WITH fvck_typ_cnt AS (
SELECT DATE_FORMAT(trx_time,'%Y-%m-%d') AS date_value,
COUNT(*) AS FvckCnt,
SUM(CASE WHEN trx_amt = 288 THEN 1 ELSE 0 END) AS WithHand,
SUM(CASE WHEN trx_amt = 388 THEN 1 ELSE 0 END) AS WithBalls,
SUM(CASE WHEN trx_amt = 588 THEN 1 ELSE 0 END) AS BlowJobbie,
SUM(CASE WHEN trx_amt = 888 THEN 1 ELSE 0 END) AS Doi,
SUM(CASE WHEN trx_amt = 1288 THEN 1 ELSE 0 END) AS DoubleFly
FROM cmb_usr_trx_rcd
WHERE usr_id = 5201314520
AND trx_time RLIKE '^2024-09'
AND mch_nm RLIKE '(按摩|保健|休闲|会所)'
GROUP BY date_value
ORDER BY date_value
),
date2409 AS (
SELECT date_value
FROM date_table
WHERE date_value RLIKE '2024-09'
),
ohya AS (
SELECT *,
LEAD(trx_amt,1) OVER(PARTITION BY DATE_FORMAT(trx_time,'%Y-%m-%d')
ORDER BY trx_time) AS second
FROM cmb_usr_trx_rcd
WHERE usr_id = 5201314520
AND trx_time RLIKE '2024-09'
AND mch_nm RLIKE '(按摩|保健|休闲|会所)'
),
ohya_cnt AS (
SELECT DATE_FORMAT(trx_time,'%Y-%m-%d') AS date_value,
COUNT(*) AS Ohya
FROM ohya
WHERE trx_amt = 888 AND second = 1288
GROUP BY date_value
)
SELECT da.date_value,
COALESCE(fv.FvckCnt,0) AS FvckCnt,
COALESCE(fv.WithHand,0) AS WithHand,
COALESCE(fv.WithBalls,0) AS WithBalls,
COALESCE(fv.BlowJobbie,0) AS BlowJobbie,
COALESCE(fv.Doi,0) AS Doi,
COALESCE(fv.DoubleFly,0) AS DoubleFly,
COALESCE(oh.Ohya,0) AS Ohya
FROM date2409 AS da
LEFT JOIN fvck_typ_cnt AS fv
ON da.date_value = fv.date_value
LEFT JOIN ohya_cnt AS oh
ON da.date_value = oh.date_value
先求出每季度各项目的次数(辅助列),再用SUM() OVER(),再把第一步注释掉,就是trx_quarter的代码在窗口函数里要重复写两遍,看起来有点繁琐
SELECT CONCAT(YEAR(trx_time),'-Q',QUARTER(trx_time)) AS trx_quarter,
/* SUM(CASE WHEN trx_amt = 288 THEN 1 ELSE 0 END) AS withhand1,*/
SUM(SUM(CASE WHEN trx_amt = 288 THEN 1 ELSE 0 END)) OVER(ORDER BY CONCAT(YEAR(trx_time),'-Q',QUARTER(trx_time))) AS withhand,
/* SUM(CASE WHEN trx_amt = 888 THEN 1 ELSE 0 END) AS doi1,*/
SUM(SUM(CASE WHEN trx_amt = 888 THEN 1 ELSE 0 END)) OVER(ORDER BY CONCAT(YEAR(trx_time),'-Q',QUARTER(trx_time))) AS doi
FROM cmb_usr_trx_rcd
WHERE usr_id = 5201314520
AND mch_nm = '红玫瑰按摩保健休闲'
AND trx_time RLIKE '^202[34]'
GROUP BY trx_quarter
ORDER BY trx_quarter
参照前面哥德堡老哥的思路写一个(我自己觉得)相对好理解的
SELECT
mch_nm
FROM
cmb_usr_trx_rcd
WHERE
trx_time RLIKE '2024'
GROUP BY
mch_nm
HAVING
COUNT(
DISTINCT CASE WHEN usr_id IN (5201314520, 5211314521) THEN usr_id ELSE NULL END
) = 2
ORDER BY
mch_nm DESC
我这么写跑出来的结果和参看答案出来的应该是一样的(用文本比较软件比对过),不过最后还是提示有误,是哪里出问题了呢
WITH class_score_cnt AS (
SELECT st.class_code,
COUNT(*) AS total_students,
SUM(CASE WHEN sc.score >= 110 THEN 1 ELSE 0 END) AS excellent_cnt,
SUM(CASE WHEN sc.score >= 90 AND sc.score < 110 THEN 1 ELSE 0 END) AS good_cnt,
SUM(CASE WHEN sc.score >= 60 AND sc.score < 90 THEN 1 ELSE 0 END) AS pass_cnt,
SUM(CASE WHEN sc.score < 60 THEN 1 ELSE 0 END) AS fail_cnt
FROM scores AS sc
LEFT JOIN students AS st
ON sc.student_id = st.student_id
WHERE sc.exam_date = '2024-06-30' AND sc.subject = '数学'
GROUP BY st.class_code
)
SELECT class_code, total_students,
CONCAT(excellent_cnt,', ',CAST(excellent_cnt/total_students*100 AS decimal(4,2)),'%') AS excellent,
CONCAT(good_cnt,', ',CAST(good_cnt/total_students*100 AS decimal(4,2)),'%') AS good,
CONCAT(pass_cnt,', ',CAST(pass_cnt/total_students*100 AS decimal(4,2)),'%') AS pass,
CONCAT(fail_cnt,', ',CAST(fail_cnt/total_students*100 AS decimal(4,2)),'%') AS fail
FROM class_score_cnt
ORDER BY class_code
WITH fav_gd AS (
SELECT
DISTINCT mch_id
FROM
xhs_fav_rcd
),
pchs_gd AS (
SELECT
DISTINCT mch_id
FROM
xhs_pchs_rcd
)
SELECT
gd.*,
CASE
WHEN fav.mch_id is NOT NULL
AND pchs.mch_id IS NOT NULL THEN 'Collected and Purchased'
WHEN fav.mch_id is NOT NULL
AND pchs.mch_id IS NULL THEN 'Only Collected Not Purchased'
WHEN fav.mch_id is NULL
AND pchs.mch_id IS NOT NULL THEN 'Only Purchased Not Collected'
ELSE 'Neither Collected NOR Purchased'
END AS category
FROM
gd_inf AS gd
LEFT JOIN fav_gd AS fav ON fav.mch_id = gd.gd_id
LEFT JOIN pchs_gd AS pchs ON pchs.mch_id = gd.gd_id
ORDER BY
gd.gd_id
SELECT
CAST(SUM(CASE WHEN pay_amount = 144 THEN pay_amount / (DATEDIFF(end_date,begin_date)+1) ELSE NULL END) AS DECIMAL(10,2)) AS yearly,
cast(SUM(CASE WHEN pay_amount = 90 THEN pay_amount / (DATEDIFF(end_date,begin_date)+1) ELSE NULL END) AS DECIMAL(10,2)) AS half_yearly,
cast(SUM(CASE WHEN pay_amount = 51 THEN pay_amount / (DATEDIFF(end_date,begin_date)+1) ELSE NULL END) AS DECIMAL(10,2)) AS quarterly,
cast(SUM(CASE WHEN pay_amount = 19 THEN pay_amount / (DATEDIFF(end_date,begin_date)+1) ELSE NULL END) AS DECIMAL(10,2)) AS monthly
FROM bilibili_m2 AS m2
LEFT JOIN bilibili_m1 AS m1
On m2.m_date BETWEEN m1.begin_date AND m1.end_date
WHERE m2.m_date RLIKE '^2021'
SELECT m2.y_m,
CAST(SUM(pay_amount / (DATEDIFF(end_date,begin_date)+1)) AS decimal(10,2)) AS total_monthly_income
FROM bilibili_m2 AS m2
LEFT JOIn bilibili_m1 AS m1
ON m2.m_date BETWEEN m1.begin_date AND m1.end_date
WHERE m2.m_date RLIKE '^2021'
GROUP By m2.y_m
SELECT FLOOR((DAYOFYEAR(m2.m_date)-1)/7)+1 AS week_number,
CAST(SUM(pay_amount/(DATEDIFF(end_date,begin_date)+1)) AS DECIMAL(10,2)) AS total_weekly_income
FROM bilibili_m2 AS m2
LEFT JOIN bilibili_m1 AS m1
ON m2.m_date BETWEEN m1.begin_date AND m1.end_date
WHERE m2.m_date RLIKE '^2021'
GROUP BY week_number
WITH user_daily_income AS (
SELECT user_id,begin_date,end_date,
pay_amount/(DATEDIFF(end_date,begin_date) + 1) AS daily_income
FROM bilibili_m1
)
SELECT CAST(SUM(daily_income) AS decimal(10,2))
FROM bilibili_m2 AS m2
LEFT JOIN user_daily_income AS ud
ON m2.m_date BETWEEN ud.begin_date AND ud.end_date
WHERE m2.m_date BETWEEN '2022-01-31' AND '2022-02-06'
SELECT CAST(SUM(pay_amount/(DATEDIFF(end_date,begin_date)+1)) AS decimal(10,2)) AS total_daily_income
FROM bilibili_m1
WHERE '2020-11-20' BETWEEN begin_date AND end_date
WITH mch_nm_10000 AS (
SELECT cust_uid,mch_nm
FROM mt_trx_rcd1
WHERE cust_uid = 'MT10000'
GROUP BY cust_uid,mch_nm
),
cust_mch AS (
SELECT cust_uid,mch_nm
FROM mt_trx_rcd1
GROUP BY cust_uid,mch_nm
)
SELECT mn.cust_uid,cm.cust_uid
FROM mch_nm_10000 AS mn
JOIN cust_mch AS cm
ON mn.cust_uid <> cm.cust_uid AND mn.mch_nm = cm.mch_nm
GROUP BY mn.cust_uid,cm.cust_uid
HAVING COUNT(*) = 14
WITH cust_the AS (
SELECT cust_uid
From mt_trx_rcd1
GROUP BY cust_uid
HAVING COUNT(DISTINCT CASE WHEN mch_nm IN (
SELECT DISTINCT mch_nm
FROM mt_trx_rcd1
WHERE cust_uid = 'MT10000'
) THEN mch_nm ELSE NULL END) = 14
),
mch_nm_10000 AS (
SELECT cust_uid
FROM mt_trx_rcd1
WHERE cust_uid = 'MT10000'
GROUP BY cust_uid
)
SELECT mn.cust_uid,ct.cust_uid AS cust_uid_1
FROM mch_nm_10000 AS mn
LEFT JOIN cust_the AS ct
ON mn.cust_uid <> ct.cust_uid
ORDER BY cust_uid_1
WITH cust_the AS (
SELECT cust_uid
From mt_trx_rcd1
GROUP BY cust_uid
HAVING COUNT(DISTINCT CASE WHEN mch_nm IN ('庄家界(千灯店)','黄记烘培宫廷桃酥王','品众素心素食餐厅','一枚帅哥做的菜') THEN mch_nm ELSE NULL END) = 4
),
mch_nm_10000 AS (
SELECT cust_uid
FROM mt_trx_rcd1
WHERE cust_uid = 'MT10000'
GROUP BY cust_uid
)
SELECT mn.cust_uid,ct.cust_uid AS cust_uid_1
FROM mch_nm_10000 AS mn
LEFT JOIN cust_the AS ct
ON mn.cust_uid <> ct.cust_uid
ORDER BY cust_uid_1
WITH cust_the AS (
SELECT cust_uid
From mt_trx_rcd1
GROUP BY cust_uid
HAVING COUNT(DISTINCT CASE WHEN mch_nm IN ('庄家界(千灯店)','黄记烘培宫廷桃酥王','品众素心素食餐厅') THEN mch_nm ELSE NULL END) = 3
),
mch_nm_10000 AS (
SELECT cust_uid
FROM mt_trx_rcd1
WHERE cust_uid = 'MT10000'
GROUP BY cust_uid
)
SELECT mn.cust_uid,ct.cust_uid AS cust_uid_1
FROM mch_nm_10000 AS mn
LEFT JOIN cust_the AS ct
ON mn.cust_uid <> ct.cust_uid
ORDER BY cust_uid_1
WITH cust_two AS (
SELECT cust_uid
FROM mt_trx_rcd1
GROUP BY cust_uid
HAVING COUNT(DISTINCT CASE WHEN mch_nm IN ('庄家界(千灯店)','黄记烘培宫廷桃酥王') THEN mch_nm ELSE NULL END) = 2
),
mch_nm_10000 AS (
SELECT cust_uid
FROM mt_trx_rcd1
WHERE cust_uid = 'MT10000'
GROUP BY cust_uid,mch_nm
)
SELECT mn.cust_uid,ct.cust_uid AS cust_uid_1
FROM mch_nm_10000 ASmn
JOIN cust_two AS ct
ON mn.cust_uid <> ct.cust_uid
GROUP BY mn.cust_uid,ct.cust_uid
ORDER BY cust_uid_1
WITH cust_mch AS (
SELECT cust_uid, mch_nm
FROM mt_trx_rcd1
WHERE mch_nm = '兰州李晓明拉面馆'
GROUP BY cust_uid, mch_nm
),
mt10000_mch AS (
SELECT cust_uid
FROM mt_trx_rcd1
WHERE cust_uid = 'MT10000'
GROUP BY cust_uid
)
SELECT mm.cust_uid,cm.cust_uid AS cust_uid_1,cm.mch_nm
FROM mt10000_mch AS mm
LEFT JOIN cust_mch As cm
ON mm.cust_uid <> cm.cust_uid
ORDER BY cm.cust_uid
WITH user_friend AS (
SELECT fr.user1 AS user_id,fr.user2 AS friend_id,us.steps
FROM friend_relationships AS fr
LEFT JOIN user_steps AS us
ON fr.user2 = us.user_id
UNION
SELECT fr.user2,fr.user1,us.steps
FROM friend_relationships AS fr
LEFT JOIN user_steps AS us
ON fr.user1 = us.user_id
UNION
SELECT user_id,user_id,steps
FROM user_steps
),
step_rnk AS (
SELECT user_id,friend_id,steps,
RANK() OVER(PARTITION BY user_id
ORDER BY steps DESC) AS row_num
FROM user_friend
)
SELECT user_id,row_num
FROM step_rnk
WHERE user_id = friend_id
ORDER By user_id
WITH user_friend AS (
SELECT user1 AS user_id, user2 AS friend_id
FROM friend_relationships
UNION
SELECT user2,user1
FROM friend_relationships
UNION
SELECT user1,user1
FROM friend_relationships
),
step_rnk AS (
SELECT uf.user_id,uf.friend_id,us.steps,
RANK() OVER(PARTITION BY user_id
ORDER BY steps DESC) AS row_num
FROM user_friend AS uf
LEFT JOIN user_steps AS us
ON us.user_id = uf.friend_id
)
SELECT *
FROM step_rnk
WITH fri_rela AS (
SELECT fr.user1,fr.user2,us.steps
FROM user_steps AS us
LEFT JOIN friend_relationships AS fr
ON fr.user2 = us.user_id
UNION
SELECT user_id,user_id,steps
FROM user_steps
ORDER BY user1,user2
)
,fri_rela_rnk AS (
SELECT user1,user2,
RANK() OVER(PARTITION BY user1
ORDER BY steps DESC) AS row_num
FROM fri_rela
)
SELECT user1 As user_id,row_num
FROM fri_rela_rnk
WHERE user1 = user2
ORDER By user_id
WITH fri_rel AS (
SELECT user1,user2
FROM friend_relationships
UNION
SELECT user1,user1
FROM friend_relationships
)
,
step_rnk AS (
SELECT fr.*,us.steps,
RANK() OVER(PARTITION BY fr.user1
ORDER BY us.steps DESC) AS row_num
FROM fri_rel AS fr
JOIN user_steps AS us
ON fr.user2 = us.user_id
ORDER BY fr.user1,fr.user2
)
SELECT user1 AS user_id,row_num
FROM step_rnk
WHERE user1 = user2
ORDER BY user_id
SELECT si.singer_id,si.singer_name,ai.album_id,ai.album_name,
SUM(CASE WHEN lr.start_time IS NOT NULL THEN 1 ELSE 0 END) AS play_count
FROM singer_info AS si
LEFT JOIN album_info AS ai
ON si.singer_id = ai.singer_id
LEFT JOIN song_info AS so
ON so.album_id = ai.album_id
LEFT JOIN listen_rcd AS lr
ON lr.song_id = so.song_id
GROUP BY si.singer_id,si.singer_name,ai.album_id,ai.album_name
HAVING play_count = 0
SELECT *
FROM numbers_for_fun
WHERE (a = 0 AND b = 0 AND c <= 0)
OR (a = 0 AND b > 0 AND c <= 0)
OR (a < 0 AND -b/2*a >= 0 AND c <= 0)
OR (a < 0 AND -b/2*a < 0 AND 4*a*c-b*b >= 0)
ORDER BY id
SELECT *
FROM numbers_for_fun
WHERE (a = 0 AND b = 0 AND c <= 0)
OR (a = 0 AND b > 0 AND c <= 0)
OR (a < 0 AND -b/2*a >= 0 AND c <= 0)
OR (a < 0 AND -b/2*a < 0 AND 4*a*c-b*b <= 0)
ORDER BY id
这样直接一个查询的话,最后结果小数部分和参考答案跑出来的有些差异,我前几题都这么写,前一题试了下小数保留4位,两种写法结果是一样的 SELECT CAST(SUM(CASE WHEN pay_amount = 144 THEN pay_amount / (DATEDIFF(end_date,begin_date)+1) ELSE NULL END) AS DECIMAL(10,2)) AS yearly, CAST(SUM(CASE WHEN pay_amount = 90 THEN pay_amount / (DATEDIFF(end_date,begin_date)+1) ELSE NULL END) AS DECIMAL(10,2)) AS half_yearly, CAST(SUM(CASE WHEN pay_amount = 51 THEN pay_amount / (DATEDIFF(end_date,begin_date)+1) ELSE NULL END) AS DECIMAL(10,2)) AS quarterly, CAST(SUM(CASE WHEN pay_amount = 19 THEN pay_amount / (DATEDIFF(end_date,begin_date)+1) ELSE NULL END) AS DECIMAL(10,2)) AS monthly, CAST(SUM(pay_amount / (DATEDIFF(end_date,begin_date)+1)) AS DECIMAL(10,2)) AS total_income FROM bilibili_m2 AS m2 LEFT JOIN bilibili_m1 AS m1 ON m2.m_date BETWEEN m1.begin_date AND m1.end_date WHERE m2.m_date RLIKE '^2021'WITH usr_act AS ( SELECT usr_id,live_id,enter_time AS event_time ,1 AS act FROM ks_live_t1 UNION ALL SELECT usr_id,live_id,leave_time AS event_time, -1 AS act FROM ks_live_t1 /* FROM 子句不要忘写 */ ), live_online_users AS ( SELECT live_id,event_time, SUM(act) OVER(PARTITION BY live_id ORDER BY event_time) AS online_users FROM usr_act ), live_online_users_rnk AS ( /* 也可以利用窗口函数 MAX() OVER() */ SELECT live_id,event_time,online_users, RANK() OVER(PARTITION BY live_id ORDER BY online_users DESC) AS rnk FROM live_online_users ), live_online_user_peak AS ( SELECT live_id, event_time,online_users FROM live_online_users_rnk WHERE rnk = 1 ), /* 本来想用FIRST_VALUE() OVER() 和 LAST_VALUE() OVER()的,不过分区内多行的话,好像不能像GROUP BY 那样轻易变成1行 */ live_online_user_peak_time AS ( SELECT live_id,online_users AS max_online_users, MIN(event_time) AS first_peak_time, MAX(event_time) AS last_peak_time FROM live_online_user_peak GROUP BY live_id,online_users ) SELECT t1.live_id,t2.live_nm,t1.max_online_users, t1.first_peak_time, t1.last_peak_time FROM live_online_user_peak_time AS t1 LEFT JOIN ks_live_t2 AS t2 ON t1.live_id = t2.live_id ORDER BY max_online_users DESCWITH usr_typ AS ( SELECT t20.usr_id,t20.v_id,t20.v_tm,t3.v_typ FROM bilibili_t20 AS t20 LEFT JOIN bilibili_t3 AS t3 ON t20.v_id = t3.v_id WHERE DATE(t20.v_tm) >= '2021-02-05' /* 或者用 t20.v_tm RLIKE '^2021-02-0[5-8]' */ ) SELECT ta.v_typ, COUNT(DISTINCT ta.usr_id) AS total_views, COUNT(DISTINCT tb.usr_id) AS retained_users, CAST(COUNT(DISTINCT tb.usr_id) / COUNT(DISTINCT ta.usr_id)*100 AS decimal(5,2)) AS retention_rate FROM usr_typ AS ta LEFT JOIN usr_typ AS tb ON ta.usr_id = tb.usr_id AND DATEDIFF(tb.v_tm,ta.v_tm) BETWEEN 1 AND 3 WHERE ta.v_tm RLIKE '^2021-02-05' GROUP BY ta.v_typ ORDER BY retention_rate DESCWITH new_user AS ( SELECT usr_id,MIN(v_date) AS first_login FROM bilibili_t100 GROUP BY usr_id ) SELECT nu.first_login AS login_date, COUNT(DISTINCT nu.usr_id) AS new_users, COUNT(DISTINCT CASE WHEN t.m_flg = 1 THEN nu.usr_id ELSE NULL END) AS new_members, CAST(COUNT(DISTINCT CASE WHEN t.m_flg = 1 THEN nu.usr_id ELSE NULL END) / COUNT(DISTINCT nu.usr_id)*100 AS decimal(5,2)) AS conversion_rate FROM new_user AS nu LEFT JOIN bilibili_t100 AS t ON nu.usr_id = t.usr_id AND nu.first_login = t.v_date GROUP BY login_date ORDER BY login_dateWITH new_user AS ( SELECT usr_id,MIN(v_date) AS first_login FROM bilibili_t100 GROUP BY usr_id ) SELECT nu.first_login AS login_date, COUNT(DISTINCT nu.usr_id) AS new_users, COUNT(DISTINCT CASE WHEN t.m_flg = 1 THEN nu.usr_id ELSE NULL END) AS new_members, CAST(COUNT(DISTINCT CASE WHEN t.m_flg = 1 THEN nu.usr_id ELSE NULL END) / COUNT(DISTINCT nu.usr_id)*100 AS decimal(5,2)) AS conversion_rate FROM new_user AS nu LEFT JOIN bilibili_t100 AS t ON nu.usr_id = t.usr_id AND nu.first_login = t.v_date GROUP BY login_date ORDER BY login_dateSELECT cust_uid FROM mt_trx_rcd1 GROUP BY cust_uid HAVING COUNT(DISTINCT CASE WHEN mch_nm IN ('庄家界(千灯店)','黄记烘培宫廷桃酥王') THEN mch_nm ELSE NULL END) = 2忘了第一次的时候是怎么写的。。。反正现在跑出来的和参考答案不一样 SELECT st.student_id, st.name, sc.score, ROW_NUMBER() OVER(ORDER BY sc.score DESC) AS rnk FROM students AS st LEFT JOIN scores AS sc ON st.student_id = sc.student_id WHERE subject = '物理' AND grade_code = 'S1' LIMIT 10 发现ROW_NUMBER() OVER() 里PARTITION BY grade_code 的有无,好像会影响成绩相同时的排序WITH user_login_date AS ( SELECT DISTINCT usr_id, DATE(login_time) AS login_date FROM user_login_log WHERE /* DATE(login_time) >= DATE_SUB(CURDATE(), INTERVAL 30 DAY) */ DATEDIFF(CURDATE(), DATE(login_time)) <= 30 /* DATEDIFF() 能比 DATE_SUB()、DATE_ADD() 少写几个字符…… */ ) SELECT ta.login_date AS first_login_date, CAST(COUNT(tb.usr_id) / COUNT(*)*100 AS decimal(10,2)) AS t_plus_1_retention_rate, CAST(COUNT(tc.usr_id) / COUNT(*)*100 AS decimal(10,2)) AS t_plus_3_retention_rate, CAST(COUNT(td.usr_id) / COUNT(*)*100 AS decimal(10,2)) AS t_plus_7_retention_rate, CAST(COUNT(te.usr_id) / COUNT(*)*100 AS decimal(10,2)) AS t_plus_14_retention_rate FROM user_login_date AS ta LEFT JOIN user_login_date AS tb ON ta.usr_id = tb.usr_id AND DATEDIFF(tb.login_date, ta.login_date) = 1 LEFT JOIN user_login_date AS tc ON ta.usr_id = tc.usr_id AND DATEDIFF(tc.login_date, ta.login_date) = 3 LEFT JOIN user_login_date AS td ON ta.usr_id = td.usr_id AND DATEDIFF(td.login_date, ta.login_date) = 7 LEFT JOIN user_login_date AS te ON ta.usr_id = te.usr_id AND DATEDIFF(te.login_date, ta.login_date) = 14 GROUP BY ta.login_date ORDER BY ta.login_date9分这位并列第二的店有三家,也就是说前三要在这三家里取二, 我这里跑出来是取了秀水餐厅、黄记烘培宫廷桃酥王,和参考答案出来的不一样,只有一个排序条件下相同值究竟怎么排的。。。 WITH rfm_inf AS ( SELECT cust_uid AS user_id, DATEDIFF(CURDATE(), MAX(trx_dt)) AS re, COUNT(DISTINCT trx_dt) AS fre, AVG(trx_amt) AS mo FROM mt_trx_rcd_f GROUP BY user_id ORDER BY user_id ), rfm_score AS ( SELECT user_id, NTILE(3) OVER(ORDER BY re DESC) AS recency_score, NTILE(3) OVER(ORDER BY fre) AS frequency_score, NTILE(3) OVER(ORDER BY mo DESC) AS monetary_score FROM rfm_inf ORDER BY user_id ), mch_nm_rnk AS ( SELECT rs.*,mf.mch_nm, COUNT(*) AS cnt, ROW_NUMBER() OVER(PARTITION BY rs.user_id ORDER BY COUNT(*) DESC) AS rnk FROM rfm_score AS rs LEFT JOIN mt_trx_rcd_f AS mf ON rs.user_id = mf.cust_uid WHERE recency_score + frequency_score + monetary_score = 9 GROUP BY rs.user_id, mf.mch_nm,rs.recency_score,rs.frequency_score,rs.monetary_score ORDER BY rs.user_id,rnk ) SELECT user_id, recency_score, frequency_score,monetary_score, CONCAT(MAX(CASE WHEN rnk = 1 THEN mch_nm ELSE NULL END),', ', MAX(CASE WHEN rnk = 2 THEN mch_nm ELSE NULL END),', ', MAX(CASE WHEN rnk = 3 THEN mch_nm ELSE NULL END)) AS Top3_mch_nm FROM mch_nm_rnk GROUP BY user_id不用GROUP_CONCAT的话, 可以用CONCAT(MAX(CASE WHEN rnk = 1 THEN mch_nm ELSE NULL END),', ', MAX(CASE WHEN rnk = 2 THEN mch_nm ELSE NULL END),', ', MAX(CASE WHEN rnk = 3 THEN mch_nm ELSE NULL END)) 或者加两个新列LEAD(,1) OVER()、LEAD(,2) OVER(),再用CONCAT(); 当然GROUP_CONCAT确实方便许多重新试了下,单独求frequency,10017分到1、10021分到2 SELECT cust_uid AS user_id, COUNT(DISTINCT trx_dt) AS fre, NTILE(3) OVER(ORDER BY COUNT(DISTINCT trx_dt)) AS frequency_score FROM mt_trx_rcd_f GROUP BY user_id ORDER BY user_id 不过我是三值一起求的,就反过来了 SELECT cust_uid AS user_id, /* DATEDIFF(CURDATE(), MAX(trx_dt)) AS re,*/ NTILE(3) OVER(ORDER BY DATEDIFF(CURDATE(), MAX(trx_dt)) DESC) AS recency_score, /* COUNT(DISTINCT trx_dt) AS fre,*/ NTILE(3) OVER(ORDER BY COUNT(DISTINCT trx_dt)) AS frequency_score, /* AVG(trx_amt) AS mo,*/ NTILE(3) OVER(ORDER BY AVG(trx_amt)) AS monetary_score FROM mt_trx_rcd_f GROUP BY user_id ORDER BY user_idWITH 用太多会不会不太好,性能会不会有明显差异 WITH fvck_typ_cnt AS ( SELECT DATE_FORMAT(trx_time,'%Y-%m-%d') AS date_value, COUNT(*) AS FvckCnt, SUM(CASE WHEN trx_amt = 288 THEN 1 ELSE 0 END) AS WithHand, SUM(CASE WHEN trx_amt = 388 THEN 1 ELSE 0 END) AS WithBalls, SUM(CASE WHEN trx_amt = 588 THEN 1 ELSE 0 END) AS BlowJobbie, SUM(CASE WHEN trx_amt = 888 THEN 1 ELSE 0 END) AS Doi, SUM(CASE WHEN trx_amt = 1288 THEN 1 ELSE 0 END) AS DoubleFly FROM cmb_usr_trx_rcd WHERE usr_id = 5201314520 AND trx_time RLIKE '^2024-09' AND mch_nm RLIKE '(按摩|保健|休闲|会所)' GROUP BY date_value ORDER BY date_value ), date2409 AS ( SELECT date_value FROM date_table WHERE date_value RLIKE '2024-09' ), ohya AS ( SELECT *, LEAD(trx_amt,1) OVER(PARTITION BY DATE_FORMAT(trx_time,'%Y-%m-%d') ORDER BY trx_time) AS second FROM cmb_usr_trx_rcd WHERE usr_id = 5201314520 AND trx_time RLIKE '2024-09' AND mch_nm RLIKE '(按摩|保健|休闲|会所)' ), ohya_cnt AS ( SELECT DATE_FORMAT(trx_time,'%Y-%m-%d') AS date_value, COUNT(*) AS Ohya FROM ohya WHERE trx_amt = 888 AND second = 1288 GROUP BY date_value ) SELECT da.date_value, COALESCE(fv.FvckCnt,0) AS FvckCnt, COALESCE(fv.WithHand,0) AS WithHand, COALESCE(fv.WithBalls,0) AS WithBalls, COALESCE(fv.BlowJobbie,0) AS BlowJobbie, COALESCE(fv.Doi,0) AS Doi, COALESCE(fv.DoubleFly,0) AS DoubleFly, COALESCE(oh.Ohya,0) AS Ohya FROM date2409 AS da LEFT JOIN fvck_typ_cnt AS fv ON da.date_value = fv.date_value LEFT JOIN ohya_cnt AS oh ON da.date_value = oh.date_value先求出每季度各项目的次数(辅助列),再用SUM() OVER(),再把第一步注释掉,就是trx_quarter的代码在窗口函数里要重复写两遍,看起来有点繁琐 SELECT CONCAT(YEAR(trx_time),'-Q',QUARTER(trx_time)) AS trx_quarter, /* SUM(CASE WHEN trx_amt = 288 THEN 1 ELSE 0 END) AS withhand1,*/ SUM(SUM(CASE WHEN trx_amt = 288 THEN 1 ELSE 0 END)) OVER(ORDER BY CONCAT(YEAR(trx_time),'-Q',QUARTER(trx_time))) AS withhand, /* SUM(CASE WHEN trx_amt = 888 THEN 1 ELSE 0 END) AS doi1,*/ SUM(SUM(CASE WHEN trx_amt = 888 THEN 1 ELSE 0 END)) OVER(ORDER BY CONCAT(YEAR(trx_time),'-Q',QUARTER(trx_time))) AS doi FROM cmb_usr_trx_rcd WHERE usr_id = 5201314520 AND mch_nm = '红玫瑰按摩保健休闲' AND trx_time RLIKE '^202[34]' GROUP BY trx_quarter ORDER BY trx_quarter如果是TIMESTAMP类型或者DATETIME类型,即YYYY-MM-DD HH:MM:SS格式,TIMESTAMPDIFF()和DATEDIFF()计算天数差可能会存在差异 DATEDIFF() 会先截取前面的DATE在直接计算差值 TIMESTAMPDIFF() 是先求时间差值,再以24小时为一个单位向下取整,相差23时59分59秒也是0天 SELECT DATEDIFF('2025-02-07 14:13:20', '2025-02-06 14:13:21') , TIMESTAMPDIFF(DAY,'2025-02-06 14:13:21','2025-02-07 14:13:20') SELECT DATEDIFF('2025-02-07 00:00:00', '2025-02-06 23:59:59') , TIMESTAMPDIFF(DAY,'2025-02-06 23:59:59','2025-02-07 00:00:00')参照前面哥德堡老哥的思路写一个(我自己觉得)相对好理解的 SELECT mch_nm FROM cmb_usr_trx_rcd WHERE trx_time RLIKE '2024' GROUP BY mch_nm HAVING COUNT( DISTINCT CASE WHEN usr_id IN (5201314520, 5211314521) THEN usr_id ELSE NULL END ) = 2 ORDER BY mch_nm DESC我这么写跑出来的结果和参看答案出来的应该是一样的(用文本比较软件比对过),不过最后还是提示有误,是哪里出问题了呢 WITH class_score_cnt AS ( SELECT st.class_code, COUNT(*) AS total_students, SUM(CASE WHEN sc.score >= 110 THEN 1 ELSE 0 END) AS excellent_cnt, SUM(CASE WHEN sc.score >= 90 AND sc.score < 110 THEN 1 ELSE 0 END) AS good_cnt, SUM(CASE WHEN sc.score >= 60 AND sc.score < 90 THEN 1 ELSE 0 END) AS pass_cnt, SUM(CASE WHEN sc.score < 60 THEN 1 ELSE 0 END) AS fail_cnt FROM scores AS sc LEFT JOIN students AS st ON sc.student_id = st.student_id WHERE sc.exam_date = '2024-06-30' AND sc.subject = '数学' GROUP BY st.class_code ) SELECT class_code, total_students, CONCAT(excellent_cnt,', ',CAST(excellent_cnt/total_students*100 AS decimal(4,2)),'%') AS excellent, CONCAT(good_cnt,', ',CAST(good_cnt/total_students*100 AS decimal(4,2)),'%') AS good, CONCAT(pass_cnt,', ',CAST(pass_cnt/total_students*100 AS decimal(4,2)),'%') AS pass, CONCAT(fail_cnt,', ',CAST(fail_cnt/total_students*100 AS decimal(4,2)),'%') AS fail FROM class_score_cnt ORDER BY class_codeWITH fav_gd AS ( SELECT DISTINCT mch_id FROM xhs_fav_rcd ), pchs_gd AS ( SELECT DISTINCT mch_id FROM xhs_pchs_rcd ) SELECT gd.*, CASE WHEN fav.mch_id is NOT NULL AND pchs.mch_id IS NOT NULL THEN 'Collected and Purchased' WHEN fav.mch_id is NOT NULL AND pchs.mch_id IS NULL THEN 'Only Collected Not Purchased' WHEN fav.mch_id is NULL AND pchs.mch_id IS NOT NULL THEN 'Only Purchased Not Collected' ELSE 'Neither Collected NOR Purchased' END AS category FROM gd_inf AS gd LEFT JOIN fav_gd AS fav ON fav.mch_id = gd.gd_id LEFT JOIN pchs_gd AS pchs ON pchs.mch_id = gd.gd_id ORDER BY gd.gd_id