输出的是和输出示例是一样的呀,问题在哪?
select
year(dt) as Y
, format(avg(REGEXP_SUBSTR(if(city = 'beijing', tmp_h, null), '-?[0-9]+')), 2)as beijing
, format(avg(REGEXP_SUBSTR(if(city = 'shanghai', tmp_h, null), '-?[0-9]+')), 2)as shanghai
, format(avg(REGEXP_SUBSTR(if(city = 'shenzhen', tmp_h, null), '-?[0-9]+')), 2)as shenzhen
, format(avg(REGEXP_SUBSTR(if(city = 'guangzhou', tmp_h, null), '-?[0-9]+')), 2)as guangzhou
from weather_rcd_china
where
year(dt) between 2011 and 2022
and city in ('beijing','shanghai','shenzhen','guangzhou')
group by 1
order by 1
记录一下第一次算峰值,想了很久,不知道还有没有其他的简单方法
with data1 as(
select usr_id, live_id, enter_time as time1, 1 as flag1
from ks_live_t1
union all
select usr_id, live_id, leave_time as time1, -1 as flag1
from ks_live_t1
),
data2 as(
select
live_id
, sum(flag1) over(partition by live_id order by time1 ) as online_users
from data1
)
select t1.live_id,t2.live_nm, max(online_users) as max_online_users
from data2 t1 left join ks_live_t2 t2 on t1.live_id = t2.live_id
group by 1, 2
order by 3 desc
如果活跃的定义就是登录的话,那么只需要判断最后一次登录时间是否 <=2024-10-01 不就可以吗
select count(1) as inactive_user_count
from(
select usr_id, max(login_time)
from user_login_log
group by 1
having max(login_time) <= '2024-10-01'
) tmp
with user_login_date as (
select
usr_id
, date_format(login_time, '%Y-%m-%d') login_date
from user_login_log
where login_time >= date_sub(curdate(), interval 180 day)
group by 1, 2
),
usr_login_cnt as(
select usr_id, count(1) cnt
from user_login_date
group by usr_id
)
select
sum(case when cnt <= 5 then 1 else 0 end) days_1_to_5
, sum(case when cnt > 5 and cnt <= 10 then 1 else 0 end) days_6_to_10
, sum(case when cnt > 10 and cnt <= 20 then 1 else 0 end) days_11_to_20
, sum(case when cnt > 20then 1 else 0 end) days_over_20
from usr_login_cnt
with user_login_pre_month as(
select
usr_id
, login_time
, case
when date_format(login_time,'%H:%i:%s') >= '07:30:00' and date_format(login_time,'%H:%i:%s') <= '09:30:00' then 'commute'
when date_format(login_time,'%H:%i:%s') >= '18:30:00' and date_format(login_time,'%H:%i:%s') <= '20:30:00' then 'commute'
when date_format(login_time,'%H:%i:%s') >= '11:30:00' and date_format(login_time,'%H:%i:%s') <= '14:00:00' then 'lunch break'
when date_format(login_time,'%H:%i:%s') >= '22:30:00' and date_format(login_time,'%H:%i:%s') <= '23:59:59' then 'bedtime'
when date_format(login_time,'%H:%i:%s') >= '00:00:00' and date_format(login_time,'%H:%i:%s') <= '01:00:00' then 'bedtime'
else null end time_1
from user_login_log
where
date_format(login_time, '%Y-%m')= date_format(date_sub(curdate(), interval 1 month), '%Y-%m')
)
select
count(distinct(case when time_1 = 'commute' then usr_id else null end )) `commute`
, count(distinct(case when time_1 = 'lunch break' then usr_id else null end )) `lunch break`
, count(distinct(case when time_1 = 'bedtime' then usr_id else null end )) `bedtime`
from user_login_pre_month
WITH user_login_date AS (
select usr_id
, DATE_FORMAT(login_time, '%Y-%m-%d') login_date
, row_number()over(partition by usr_id order by login_time) rn
from user_login_log
)
select login_date as first_login_date, count(distinct usr_id) cnt
from user_login_date
where rn = 1 and login_date>= '2024-09-01' and login_date< '2024-10-01'
group by login_date
WITH user_login_date AS (
SELECT
usr_id,
DATE_FORMAT(login_time, '%Y-%m-01') AS login_date,
DATE_FORMAT(DATE_ADD(DATE_FORMAT(login_time, '%Y-%m-01'), INTERVAL 1 MONTH), '%Y-%m-01') AS next_month,
row_number() over(partition by usr_id order by login_time ) rn
FROM
user_login_log
WHERE
login_time >= '2024-01-01' AND login_time < '2025-02-01'
),
new_user_login_date as(
selectdistinct usr_id, login_date, next_month
from user_login_date
where rn = 1)
select
t1.login_date as current_month,
CAST(
CASE
WHEN COUNT(DISTINCT t1.usr_id) = 0 THEN 0
ELSE COUNT(DISTINCT t2.usr_id) * 100.0 / COUNT(DISTINCT t1.usr_id)
END AS DECIMAL(10, 2)
) AS t_plus_1_month_retention_rate
from new_user_login_date t1 left join user_login_date t2 on t1.usr_id = t2.usr_id and t1.next_month = t2.login_date
group by t1.login_date
limit 12
WITH user_login_date AS (
SELECT
usr_id,
DATE_FORMAT(login_time, '%Y-%m-01') AS login_date,
DATE_FORMAT(DATE_ADD(DATE_FORMAT(login_time, '%Y-%m-01'), INTERVAL 1 MONTH), '%Y-%m-01') AS next_month,
row_number() over(partition by usr_id order by login_time ) rn
FROM
user_login_log
WHERE
login_time >= '2024-01-01' AND login_time < '2025-02-01'
),
new_user_login_date as(
selectdistinct usr_id, login_date, next_month
from user_login_date
where rn = 1)
select
t1.login_date as current_month,
CAST(
CASE
WHEN COUNT(DISTINCT t1.usr_id) = 0 THEN 0
ELSE COUNT(DISTINCT t2.usr_id) * 100.0 / COUNT(DISTINCT t1.usr_id)
END AS DECIMAL(10, 2)
) AS t_plus_1_month_retention_rate
from new_user_login_date t1 left join user_login_date t2 on t1.usr_id = t2.usr_id and t1.next_month = t2.login_date
group by t1.login_date
limit 11
WITH user_login_date AS (
SELECT
usr_id,
DATE_FORMAT(login_time, '%Y-%m-01') AS login_date,
DATE_FORMAT(DATE_ADD(DATE_FORMAT(login_time, '%Y-%m-01'), INTERVAL 1 MONTH), '%Y-%m-01') AS next_month,
row_number() over(partition by usr_id order by login_time ) rn
FROM
user_login_log
WHERE
login_time >= '2024-01-01' AND login_time < '2025-02-01'
),
new_user_login_date as(
selectdistinct usr_id, login_date, next_month
from user_login_date
where rn = 1)
select
t1.login_date as current_month,
CAST(
CASE
WHEN COUNT(DISTINCT t1.usr_id) = 0 THEN 0
ELSE COUNT(DISTINCT t2.usr_id) * 100.0 / COUNT(DISTINCT t1.usr_id)
END AS DECIMAL(10, 2)
) AS t_plus_1_month_retention_rate
from new_user_login_date t1 left join user_login_date t2 on t1.usr_id = t2.usr_id and t1.next_month = t2.login_date
group by t1.login_date
WITH user_login_date AS (
SELECT
usr_id,
DATE_FORMAT(login_time, '%Y-%m-01') AS login_date,
DATE_FORMAT(DATE_ADD(DATE_FORMAT(login_time, '%Y-%m-01'), INTERVAL 1 MONTH), '%Y-%m-01') AS next_month
FROM
user_login_log
WHERE
login_time >= '2024-01-01' AND login_time < '2025-02-01'
)
SELECT
t1.login_date AS current_month,
CAST(
CASE
WHEN COUNT(DISTINCT t1.usr_id) = 0 THEN 0
ELSE COUNT(DISTINCT t2.usr_id) * 100.0 / COUNT(DISTINCT t1.usr_id)
END AS DECIMAL(10, 2)
) AS t_plus_1_month_retention_rate
FROM
user_login_date t1
LEFT JOIN
user_login_date t2
ON t1.usr_id = t2.usr_id
AND t2.login_date = t1.next_month
GROUP BY
t1.login_date
ORDER BY
t1.login_date
limit 11
WITH user_login_date AS (
SELECT DISTINCT usr_id, DATE(login_time) AS login_date
FROM user_login_log
WHERE DATEDIFF(CURDATE(), DATE(login_time)) <= 90
)
select
t1.login_date as first_login_date
, cast( (count(DISTINCT t3.usr_id) / count(DISTINCT t1.usr_id) * 100) as decimal(10, 2)) as t_plus_3_retention_rate
, cast( (count(DISTINCT t4.usr_id) / count(DISTINCT t1.usr_id) * 100) as decimal(10, 2)) as t_plus_7_retention_rate
, cast( (count(DISTINCT t5.usr_id) / count(DISTINCT t1.usr_id) * 100) as decimal(10, 2)) as t_plus_14_retention_rate
from user_login_date t1
left join user_login_date t3 on t1.usr_id = t3.usr_id and datediff(t3.login_date, t1.login_date) between 1 and 3
left join user_login_date t4 on t1.usr_id = t4.usr_id and datediff(t4.login_date, t1.login_date) between 1 and 7
left join user_login_date t5 on t1.usr_id = t5.usr_id and datediff(t5.login_date, t1.login_date) between 1 and 14
group by 1
order by 1
WITH user_login_date AS (
SELECT DISTINCT usr_id, DATE(login_time) AS login_date
FROM user_login_log
WHERE DATEDIFF(CURDATE(), DATE(login_time)) <= 90
)
select
t1.login_date as first_login_date
, cast( (count(t3.usr_id) / count(*) * 100) as decimal(10, 2)) as t_plus_3_retention_rate
, cast( (count(t4.usr_id) / count(*) * 100) as decimal(10, 2)) as t_plus_7_retention_rate
, cast( (count(t5.usr_id) / count(*) * 100) as decimal(10, 2)) as t_plus_14_retention_rate
from user_login_date t1
left join user_login_date t3 on t1.usr_id = t3.usr_id and datediff(t3.login_date, t1.login_date) between 2 and 3
left join user_login_date t4 on t1.usr_id = t4.usr_id and datediff(t4.login_date, t1.login_date) between 2 and 7
left join user_login_date t5 on t1.usr_id = t5.usr_id and datediff(t5.login_date, t1.login_date) between 2 and 14
group by 1
order by 1
WITH user_login_date AS (
SELECT DISTINCT usr_id, DATE(login_time) AS login_date
FROM user_login_log
WHERE DATEDIFF(CURDATE(), DATE(login_time)) <= 90
)
select
t1.login_date as first_login_date
, cast( (count(t3.usr_id) / count(*) * 100) as decimal(10, 2)) as t_plus_3_retention_rate
, cast( (count(t4.usr_id) / count(*) * 100) as decimal(10, 2)) as t_plus_7_retention_rate
, cast( (count(t5.usr_id) / count(*) * 100) as decimal(10, 2)) as t_plus_14_retention_rate
from user_login_date t1
left join user_login_date t3 on t1.usr_id = t3.usr_id and datediff(t3.login_date, t1.login_date) between 1 and 3
left join user_login_date t4 on t1.usr_id = t4.usr_id and datediff(t4.login_date, t1.login_date) between 1 and 7
left join user_login_date t5 on t1.usr_id = t5.usr_id and datediff(t5.login_date, t1.login_date) between 1 and 14
group by 1
order by 1
WITH user_login_date AS (
SELECT DISTINCT usr_id, DATE(login_time) AS login_date
FROM user_login_log
WHERE DATEDIFF(CURDATE(), DATE(login_time)) <= 30
)
select
t1.login_date as first_login_date
, cast( (count(t2.usr_id) / count(*) * 100) as decimal(10, 2)) as t_plus_1_retention_rate
, cast( (count(t3.usr_id) / count(*) * 100) as decimal(10, 2)) as t_plus_3_retention_rate
, cast( (count(t4.usr_id) / count(*) * 100) as decimal(10, 2)) as t_plus_7_retention_rate
, cast( (count(t5.usr_id) / count(*) * 100) as decimal(10, 2)) as t_plus_14_retention_rate
from user_login_date t1
left join user_login_date t2 on t1.usr_id = t2.usr_id and datediff(t2.login_date, t1.login_date) = 1
left join user_login_date t3 on t1.usr_id = t3.usr_id and datediff(t3.login_date, t1.login_date) = 3
left join user_login_date t4 on t1.usr_id = t4.usr_id and datediff(t4.login_date, t1.login_date) = 7
left join user_login_date t5 on t1.usr_id = t5.usr_id and datediff(t5.login_date, t1.login_date) = 14
group by 1
order by 1
select
year(dt) as Y
, cast(avg(REGEXP_SUBSTR(if(city = 'beijing', tmp_h, null), '-?[0-9]+'))as decimal(4, 2))as beijing
, cast(avg(REGEXP_SUBSTR(if(city = 'shanghai', tmp_h, null), '-?[0-9]+'))as decimal(4, 2))as shanghai
, cast(avg(REGEXP_SUBSTR(if(city = 'shenzhen', tmp_h, null), '-?[0-9]+'))as decimal(4, 2))as shenzhen
, cast(avg(REGEXP_SUBSTR(if(city = 'guangzhou', tmp_h, null), '-?[0-9]+'))as decimal(4, 2))as guangzhou
from weather_rcd_china
where
year(dt) between 2011 and 2022
and city in ('beijing','shanghai','shenzhen','guangzhou')
group by 1
order by 1
select
year(dt) as `year`
,cast(avg(REGEXP_SUBSTR(tmp_h, '-?[0-9]+')) as decimal(4, 2))as avg_tmp_h
,CASE
WHEN ABS(AVG(CAST(REGEXP_SUBSTR(tmp_h, '-?[0-9]+') AS DECIMAL)) -
LAG(AVG(CAST(REGEXP_SUBSTR(tmp_h, '-?[0-9]+') AS DECIMAL)), 1) OVER (ORDER BY year(dt))) >= 1
THEN 'Yes'
ELSE 'No'
END as significant_change
from weather_rcd_china
where
year(dt) between 2011 and 2022
and city = 'shenzhen'
group by 1
order by 1
WITH yearly_avg AS (
SELECT
city,
YEAR(dt) AS year,
AVG(REPLACE(tmp_h, '℃', '') + 0) AS avg_high_temp
FROM weather_rcd_china
WHERE city = 'shenzhen'
AND dt BETWEEN '2011-01-01' AND '2022-12-31'
GROUP BY city, YEAR(dt)
),
yearly_avg_with_lag AS (
SELECT
city,
year,
avg_high_temp,
LAG(avg_high_temp) OVER (ORDER BY year) AS prev_year_avg_temp
FROM yearly_avg
),
temp_changes AS (
SELECT
year,
avg_high_temp,
prev_year_avg_temp,
(avg_high_temp - COALESCE(prev_year_avg_temp, 0)) AS temp_change
FROM yearly_avg_with_lag
)
SELECT
year,
cast(avg_high_temp as decimal(10,2)) as avg_tmp_h,
CASE
WHEN ABS(avg_high_temp - COALESCE(prev_year_avg_temp, 0)) >=1 THEN 'Yes'
ELSE 'No'
END AS significant_change
FROM temp_changes
WHERE year BETWEEN 2011 AND 2022
ORDER BY year;
select
year(dt) as `year`
, format(avg(REGEXP_SUBSTR(tmp_h, '-?[0-9]+')), 2)as avg_tmp_h
,CASE
WHEN ABS(AVG(CAST(REGEXP_SUBSTR(tmp_h, '-?[0-9]+') AS DECIMAL)) -
LAG(AVG(CAST(REGEXP_SUBSTR(tmp_h, '-?[0-9]+') AS DECIMAL)), 1) OVER (ORDER BY year(dt))) >= 1
THEN 'Yes'
ELSE 'No'
END as significant_change
from weather_rcd_china
where
year(dt) between 2011 and 2022
and city = 'shenzhen'
group by 1
order by 1
select
year(dt) as Y
, format(avg(REGEXP_SUBSTR(if(city = 'beijing', tmp_h, null), '-?[0-9]+')), 2)as beijing
, format(avg(REGEXP_SUBSTR(if(city = 'shanghai', tmp_h, null), '-?[0-9]+')), 2)as shanghai
, format(avg(REGEXP_SUBSTR(if(city = 'shenzhen', tmp_h, null), '-?[0-9]+')), 2)as shenzhen
from weather_rcd_china
where
year(dt) between 2011 and 2022
and city in ('beijing','shanghai','shenzhen','guangzhou')
group by 1
order by 1
select
year(dt) as Y
, format(avg(REGEXP_SUBSTR(if(city = 'beijing', tmp_h, null), '-?[0-9]+')), 2)as beijing
, format(avg(REGEXP_SUBSTR(if(city = 'shanghai', tmp_h, null), '-?[0-9]+')), 2)as shanghai
, format(avg(REGEXP_SUBSTR(if(city = 'shenzhen', tmp_h, null), '-?[0-9]+')), 2)as shenzhen
, format(avg(REGEXP_SUBSTR(if(city = 'guangzhou', tmp_h, null), '-?[0-9]+')), 2)as '广州'
from weather_rcd_china
where
year(dt) between 2011 and 2022
and city in ('beijing','shanghai','shenzhen','guangzhou')
group by 1
order by 1
select
year(dt) as Y
, format(avg(REGEXP_SUBSTR(if(city = 'beijing', tmp_h, null), '-?[0-9]+')), 2)as beijing
, format(avg(REGEXP_SUBSTR(if(city = 'shanghai', tmp_h, null), '-?[0-9]+')), 2)as shanghai
, format(avg(REGEXP_SUBSTR(if(city = 'shenzhen', tmp_h, null), '-?[0-9]+')), 2)as shenzhen
, format(avg(REGEXP_SUBSTR(if(city = 'guangzhou', tmp_h, null), '-?[0-9]+')), 2)as guangzhou
from weather_rcd_china
where
year(dt) between 2011 and 2022
and city in ('beijing','shanghai','shenzhen','guangzhou')
group by 1
order by 1
select
city
, sum(case when con like '%雪%' then 1 else 0 end) as snowy_days
from weather_rcd_china
where month(dt) in (12, 1, 2)
group by 1
order by 2 desc