with new_map as(select loc_nm
,caseloc_nm
when "北京机床研究所" then "写字楼"
when "将台西" then "地铁站"
else loc_type
end as loc_type1
from gd_loc_map)
, bikedateloc as(
select b.id,b.user_id,date(b.start_time) dt
,cast(substring(date_format(start_time,'%Y-%m'),1,4)as unsigned)*100
+cast(substring(date_format(start_time,'%Y-%m'),6,2)as unsigned) as ym
,end_loc,start_loc
, g1.loc_type1 as starttype
,g2.loc_type1 as endtype
from hello_bike_riding_rcdb
left join new_map g1
on b.start_loc=g1.loc_nm
left join new_map g2
on b.end_loc=g2.loc_nm
where YEAR(start_time) between 2020 and 2024
)
,datecnt as(
select user_id
,ym
,count(distinct dt) as cnt
from bikedateloc
where starttype != endtype
and endtype in('地铁站','写字楼')
and starttype in('地铁站','写字楼')
group by 1,2
having cnt>=5
order by 1,2
)
,
lianxu as(
select
user_id
,ym
,cnt
,lag(ym)over(partition by user_id
order byym) as lastmonth
,lag(ym,2)over(partition by user_id
order byym) as lastmonth2
from datecnt
group by 1,2)
,tagbike as(
select user_id,ym,
((ym-lastmonth=1) and(lastmonth-lastmonth2=1))
as tag
from lianxu
)
,
active_users as(
select * from(
select user_id,max(coalesce(tag,0)) as active_tag
from tagbike
group by 1)a
where active_tag=1)
SELECT
u.user_id,
IF(a.user_id IS NOT NULL, 1, 0) AS active_tag
FROM (SELECT DISTINCT user_id FROM hello_bike_riding_rcd) u
LEFT JOIN active_users a ON u.user_id = a.user_id
;
with new_map as(select loc_nm
,caseloc_nm when "凯德广场" then "购物广场"
when "北京机床研究所" then "写字楼"
when "将台西" then "地铁站"
else loc_type
end as loc_type1
from gd_loc_map)
, bikedateloc as(
select b.id,b.user_id,date(b.start_time) dt
,cast(substring(date_format(start_time,'%Y-%m'),1,4)as unsigned)*100
+cast(substring(date_format(start_time,'%Y-%m'),6,2)as unsigned) as ym
,end_loc,start_loc
, g1.loc_type1 as starttype
,g2.loc_type1 as endtype
from hello_bike_riding_rcdb
left join new_map g1
on b.start_loc=g1.loc_nm
left join new_map g2
on b.end_loc=g2.loc_nm
where YEAR(start_time) between 2020 and 2024
)
,datecnt as(
select user_id
,ym
,count(distinct dt) as cnt
from bikedateloc
where starttype != endtype
and endtype in('地铁站','写字楼')
and starttype in('地铁站','写字楼')
group by 1,2
having cnt>=5
order by 1,2
)
,
lianxu as(
select
user_id
,ym
,cnt
,lag(ym)over(partition by user_id
order byym) as lastmonth
,lag(ym,2)over(partition by user_id
order byym) as lastmonth2
from datecnt
group by 1,2)
,tagbike as(
select user_id,ym,
((ym-lastmonth=1) and(lastmonth-lastmonth2=1))
as tag
from lianxu
)
,
active_users as(
select * from(
select user_id,max(coalesce(tag,0)) as active_tag
from tagbike
group by 1)a
where active_tag=1)
SELECT
u.user_id,
IF(a.user_id IS NOT NULL, 1, 0) AS active_tag
FROM (SELECT DISTINCT user_id FROM hello_bike_riding_rcd) u
LEFT JOIN active_users a ON u.user_id = a.user_id
;
with new_map as(select loc_nm
,caseloc_nm when "凯德广场" then "购物广场"
when "北京机床研究所" then "写字楼"
when "将台西" then "地铁站"
else loc_type
end as loc_type1
from gd_loc_map)
, bikedateloc as(
select b.id,b.user_id,date(b.start_time) dt
,cast(substring(date_format(start_time,'%Y-%m'),1,4)as unsigned)*100
+cast(substring(date_format(start_time,'%Y-%m'),6,2)as unsigned) as ym
,end_loc,start_loc
, g1.loc_type1 as starttype
,g2.loc_type1 as endtype
from hello_bike_riding_rcdb
left join new_map g1
on b.start_loc=g1.loc_nm
left join new_map g2
on b.end_loc=g2.loc_nm
where YEAR(start_time) between 2020 and 2024
)
,datecnt as(
select user_id
,ym
,count(distinct dt) as cnt
from bikedateloc
where starttype != endtype
and endtype in('地铁站','写字楼')
and starttype in('地铁站','写字楼')
group by 1,2
having cnt>=5
order by 1,2
)
,
lianxu as(
select
user_id
,ym
,cnt
,lag(ym)over(partition by user_id
order byym) as lastmonth
,lag(ym,2)over(partition by user_id
order byym) as lastmonth2
from datecnt
group by 1,2)
,tagbike as(
select user_id,ym,
((ym-lastmonth=1) and(lastmonth-lastmonth2=1))
as tag
from lianxu
)
select user_id,max(coalesce(tag,0)) as active_tag
from tagbike
group by 1
with new_map as(select loc_nm
,caseloc_nm when "凯德广场" then "购物广场"
when "北京机床研究所" then "写字楼"
when "将台西" then "地铁站"
else loc_type
end as loc_type
from gd_loc_map)
,
commute_records AS (
SELECT
r.user_id,
DATE(r.start_time) AS ride_date,
DATE_FORMAT(r.start_time, '%Y-%m') AS month_str
FROM hello_bike_riding_rcd r
JOIN new_map s ON r.start_loc = s.loc_nm
JOIN new_map e ON r.end_loc = e.loc_nm
WHERE
YEAR(r.start_time) BETWEEN 2020 AND 2024
AND (
(s.loc_type = '地铁站' AND e.loc_type = '写字楼')
OR (s.loc_type = '写字楼' AND e.loc_type = '地铁站')
)
GROUP BY r.user_id, DATE(r.start_time), month_str
),
active_months AS (
SELECT
user_id,
month_str,
COUNT(DISTINCT ride_date) AS commute_days
FROM commute_records
GROUP BY user_id, month_str
HAVING commute_days >= 5
),
monthly_data AS (
SELECT
user_id,
month_str,
CAST(REPLACE(month_str, '-', '') AS UNSIGNED) AS month_num
FROM active_months
),
consecutive_check AS (
SELECT
user_id,
month_num,
(month_num - LAG(month_num, 1) OVER (PARTITION BY user_id ORDER BY month_num) = 1)
AND (LAG(month_num, 1) OVER (PARTITION BY user_id ORDER BY month_num) - LAG(month_num, 2) OVER(PARTITION BY user_id ORDER BY month_num) = 1)
AS is_consecutive
FROM monthly_data
),
active_users AS (
SELECT DISTINCT user_id
FROM consecutive_check
WHERE is_consecutive = 1
)
SELECT
u.user_id,
IF(a.user_id IS NOT NULL, 1, 0) AS active_tag
FROM (SELECT DISTINCT user_id FROM hello_bike_riding_rcd) u
LEFT JOIN active_users a ON u.user_id = a.user_id;
with bikedateloc as(
select b.id,b.user_id,date(b.start_time) dt
,cast(substring(date_format(start_time,'%Y-%m'),1,4)as unsigned)*100
+cast(substring(date_format(start_time,'%Y-%m'),6,2)as unsigned) as ym
,end_loc,start_loc
, g1.loc_type as starttype
,g2.loc_type as endtype
from hello_bike_riding_rcdb
left join gd_loc_map g1
on b.start_loc=g1.loc_nm
left join gd_loc_map g2
on b.end_loc=g2.loc_nm
where YEAR(start_time) between 2020 and 2024
)
,datecnt as(
select user_id
,ym
,count(distinct dt) as cnt
from bikedateloc
where starttype != endtype
and endtype in('地铁站','写字楼')
and starttype in('地铁站','写字楼')
group by 1,2
having cnt>=5
order by 1,2
)
,
lianxu as(
select
user_id
,ym
,cnt
,lag(ym)over(partition by user_id
order byym) as lastmonth
,lag(ym,2)over(partition by user_id
order byym) as lastmonth2
from datecnt
group by 1,2)
,tagbike as(
select user_id,ym,
((ym-lastmonth=1) and(lastmonth-lastmonth2=1))
as tag
from lianxu
)
select user_id,max(coalesce(tag,0)) as active_tag
from tagbike
group by 1
with bikedateloc as(
select b.id,b.user_id,date(b.start_time) dt
,cast(substring(date_format(start_time,'%Y-%m'),1,4)as unsigned)*100
+cast(substring(date_format(start_time,'%Y-%m'),6,2)as unsigned) as ym
,end_loc,start_loc
, g1.loc_type as starttype
,g2.loc_type as endtype
from hello_bike_riding_rcdb
left join gd_loc_map g1
on b.start_loc=g1.loc_nm
left join gd_loc_map g2
on b.end_loc=g2.loc_nm
)
,datecnt as(
select user_id
,ym
,count(distinct dt) as cnt
from bikedateloc
where starttype != endtype
and endtype in('地铁站','写字楼')
and starttype in('地铁站','写字楼')
group by 1,2
having cnt>=5
order by 1,2
)
,
lianxu as(
select
user_id
,ym
,cnt
,lag(ym)over(partition by user_id
order byym) as lastmonth
,lag(ym,2)over(partition by user_id
order byym) as lastmonth2
from datecnt
group by 1,2)
,tagbike as(
select user_id,ym,
((ym-lastmonth=1) and(lastmonth-lastmonth2=1))
as tag
from lianxu
)
select user_id,max(coalesce(tag,0)) as active_tag
from tagbike
group by 1
with data1 as (
select distinct usr_id,
date(login_time) as login_date
from user_login_log
),
data2 as (
select usr_id,
login_date,
first_value(login_date) over (partition by usr_id order by login_date) as `start_date`,
lag(login_date) over (partition by usr_id order by login_date) as `pre_date`
from data1
),
data3 as (
selectdistinct data1.usr_id,
data1.login_date,
case
when pre_date is null and datediff(data1.login_date,start_date) = 0 then 'flag-1'
when pre_date is not null and datediff(data1.login_date,pre_date) <= 3 then 'flag-2'
else 'flag-3'
end as flag
from data1
left join data2
on (data1.usr_id,data1.login_date) = (data2.usr_id,data2.login_date)
),
data4 as (
select login_date,
round((sum(if(flag = 'flag-1', 1, 0)) / count(1)) * 100, 2) as rate_flag_1,
round((sum(if(flag = 'flag-2', 1, 0)) / count(1)) * 100, 2) as rate_flag_2,
round((sum(if(flag = 'flag-3', 1, 0)) / count(1)) * 100, 2) as rate_flag_3
from data3
group by login_date
)
select login_date,
concat_ws(', ', rate_flag_1, rate_flag_2, rate_flag_3) as pct
from data4
where year(login_date) = '2024'
group by login_date
order by login_date;
WITH
user_daily_login AS (
SELECT
usr_id,
DATE(login_time) AS login_date
FROM user_login_log
GROUP BY usr_id, DATE(login_time)
),
user_first_login AS (
SELECT
usr_id,
MIN(login_date) AS first_login_date
FROM user_daily_login
GROUP BY usr_id
),
user_login_with_history AS (
SELECT
udl.usr_id,
udl.login_date,
ufl.first_login_date
FROM user_daily_login udl
JOIN user_first_login ufl
ON udl.usr_id = ufl.usr_id
),
user_login_status AS (
SELECT
usr_id,
login_date,
LAG(login_date) OVER (PARTITION BY usr_id ORDER BY login_date) AS last_login_before
FROM user_login_with_history ulwh
),
daily_summary AS (
SELECT
login_date,
COUNT(*) AS total_users,
SUM(CASE WHEN last_login_before IS NULL THEN 1 ELSE 0 END) AS new_users,
SUM(CASE WHEN DATEDIFF(login_date, last_login_before) BETWEEN 1 AND 3 THEN 1 ELSE 0 END) AS retained_users,
SUM(CASE WHEN DATEDIFF(login_date, last_login_before) >= 4 THEN 1 ELSE 0 END) AS returned_users
FROM user_login_status
WHERE login_date BETWEEN '2024-01-01' AND '2024-12-31'
GROUP BY login_date
)
SELECT
login_date,
concat_ws(', ',
ROUND(new_users * 100.0 / total_users, 2) ,
ROUND(retained_users * 100.0 / total_users,2),
ROUND(returned_users * 100.0 / total_users, 2) ) pct
FROM daily_summary
ORDER BY login_date;
WITH
user_daily_login AS (
SELECT
usr_id,
DATE(login_time) AS login_date
FROM user_login_log
GROUP BY usr_id, DATE(login_time)
),
user_first_login AS (
SELECT
usr_id,
MIN(login_date) AS first_login_date
FROM user_daily_login
GROUP BY usr_id
),
user_login_with_history AS (
SELECT
udl.usr_id,
udl.login_date,
ufl.first_login_date
FROM user_daily_login udl
JOIN user_first_login ufl
ON udl.usr_id = ufl.usr_id
),
user_login_status AS (
SELECT
usr_id,
login_date,
LAG(login_date) OVER (PARTITION BY usr_id ORDER BY login_date) AS last_login_before
FROM user_login_with_history ulwh
),
daily_summary AS (
SELECT
login_date,
COUNT(*) AS total_users,
SUM(CASE WHEN last_login_before IS NULL THEN 1 ELSE 0 END) AS new_users,
SUM(CASE WHEN DATEDIFF(login_date, last_login_before) BETWEEN 1 AND 3 THEN 1 ELSE 0 END) AS retained_users,
SUM(CASE WHEN DATEDIFF(login_date, last_login_before) >= 4 THEN 1 ELSE 0 END) AS returned_users
FROM user_login_status
WHERE login_date BETWEEN '2024-01-01' AND '2024-12-31'
GROUP BY login_date
)
SELECT
login_date,
concat_ws(',',
ROUND(new_users * 100.0 / total_users, 2) ,
ROUND(retained_users * 100.0 / total_users, 2) ,
ROUND(returned_users * 100.0 / total_users, 2) ) pct
FROM daily_summary
ORDER BY login_date;
WITH
user_daily_login AS (
SELECT
usr_id,
DATE(login_time) AS login_date
FROM user_login_log
GROUP BY usr_id, DATE(login_time)
),
user_first_login AS (
SELECT
usr_id,
MIN(login_date) AS first_login_date
FROM user_daily_login
GROUP BY usr_id
),
user_login_with_history AS (
SELECT
udl.usr_id,
udl.login_date,
ufl.first_login_date
FROM user_daily_login udl
JOIN user_first_login ufl
ON udl.usr_id = ufl.usr_id
),
user_login_status AS (
SELECT
usr_id,
login_date,
LAG(login_date) OVER (PARTITION BY usr_id ORDER BY login_date) AS last_login_before
FROM user_login_with_history ulwh
),
daily_summary AS (
SELECT
login_date,
COUNT(*) AS total_users,
SUM(CASE WHEN last_login_before IS NULL THEN 1 ELSE 0 END) AS new_users,
SUM(CASE WHEN DATEDIFF(login_date, last_login_before) BETWEEN 1 AND 3 THEN 1 ELSE 0 END) AS retained_users,
SUM(CASE WHEN DATEDIFF(login_date, last_login_before) >= 4 THEN 1 ELSE 0 END) AS returned_users
FROM user_login_status
WHERE login_date BETWEEN '2024-01-01' AND '2024-12-31'
GROUP BY login_date
)
SELECT
login_date,
concat_ws(',',
ROUND(new_users * 1.0 / total_users, 2) ,
ROUND(retained_users * 1.0 / total_users, 2) ,
ROUND(returned_users * 1.0 / total_users, 2) ) pct
FROM daily_summary
ORDER BY login_date;
WITH daily_logins AS (
SELECT
usr_id,
DATE(login_time) AS login_day,
MIN(login_time) AS login_time
FROM user_login_log
WHERE YEAR(login_time) = 2024
GROUP BY usr_id, DATE(login_time)
),
first_login AS (
SELECT
usr_id,
MIN(login_time) AS first_login_date
FROM user_login_log
GROUP BY usr_id
),
login_with_meta AS (
SELECT
dl.usr_id,
dl.login_day,
dl.login_time,
fl.first_login_date,
LAG(dl.login_time) OVER (PARTITION BY dl.usr_id ORDER BY dl.login_time) AS last_login_before
FROM daily_logins dl
LEFT JOIN first_login fl ON dl.usr_id = fl.usr_id
)
SELECT
login_day as login_date,
concat_ws(',',
ROUND(SUM(CASE WHEN last_login_before IS NULL THEN 1 ELSE 0 END) / COUNT(*) * 100, 2) ,
ROUND(SUM(CASE WHEN DATEDIFF(login_time, last_login_before) BETWEEN 1 AND 3 THEN 1 ELSE 0 END) / COUNT(*) * 100, 2),
ROUND(SUM(CASE WHEN DATEDIFF(login_time, last_login_before) >= 4 THEN 1 ELSE 0 END) / COUNT(*) * 100, 2) ) pct
FROM login_with_meta
GROUP BY login_day
ORDER BY login_day;
WITH
user_daily_login AS (
SELECT
usr_id,
DATE(login_time) AS login_date
FROM user_login_log
GROUP BY usr_id, DATE(login_time)
),
user_first_login AS (
SELECT
usr_id,
MIN(login_date) AS first_login_date
FROM user_daily_login
GROUP BY usr_id
),
user_login_with_history AS (
SELECT
udl.usr_id,
udl.login_date,
ufl.first_login_date
FROM user_daily_login udl
JOIN user_first_login ufl
ON udl.usr_id = ufl.usr_id
),
user_login_status AS (
SELECT
usr_id,
login_date,
CASE
WHEN login_date = first_login_date THEN 'new'
WHEN EXISTS (
SELECT 1
FROM user_daily_login prev
WHERE prev.usr_id = ulwh.usr_id
AND prev.login_date BETWEEN ulwh.login_date - INTERVAL 3 DAY AND ulwh.login_date - INTERVAL 1 DAY
) THEN 'retained'
ELSE 'returned'
END AS status
FROM user_login_with_history ulwh
),
daily_summary AS (
SELECT
login_date,
COUNT(*) AS total_users,
COUNT(CASE WHEN status = 'new' THEN usr_id END) AS new_users,
COUNT(CASE WHEN status = 'retained' THEN usr_id END) AS retained_users,
COUNT(CASE WHEN status = 'returned' THEN usr_id END) AS returned_users
FROM user_login_status
WHERE login_date BETWEEN '2024-01-01' AND '2024-12-31'
GROUP BY login_date
)
SELECT
login_date,
concat_ws(', ',
ROUND(new_users * 100.0 / total_users, 2) ,
ROUND(retained_users * 100.0 / total_users, 2),
ROUND(returned_users * 100.0 / total_users, 2)) pct
FROM daily_summary
ORDER BY login_date;
with duiying as(select t100.m_flg,t100.usr_id
,v_id,v_tm
from bilibili_t100 t100
left join
bilibili_t20 t20
on t100.usr_id=t20.usr_id
where date(v_tm)between '2021-02-01' and '2021-02-28'
),
ccnt as(
select
m_flg
,usr_id
,date(v_tm)
,count(v_id) as cnt from duiying
group by 1,2,3
)
select m_flg,round(avg(cnt),2)
from ccnt
group by 1
WITH UserVideoTypes AS (
SELECT
t20.usr_id,
t3.v_typ
FROM
bilibili_t20 t20
JOIN
bilibili_t3 t3 ON t20.v_id = t3.v_id
),
UserViewCategories AS (
SELECT
usr_id,
COUNT(DISTINCT v_typ) AS view_category_count
FROM
UserVideoTypes
GROUP BY
usr_id
),
FilteredUsers AS (
SELECT
usr_id
FROM
UserViewCategories
WHERE
view_category_count >= 3
),
VideoCategoryCounts AS (
SELECT
v_typ,
COUNT(DISTINCT usr_id) AS total_viewers
FROM
UserVideoTypes
GROUP BY
v_typ
),
MultiCategoryViewers AS (
SELECT
v_typ,
COUNT(DISTINCT ut.usr_id) AS multi_category_viewers
FROM
UserVideoTypes ut
JOIN
FilteredUsers fu ON ut.usr_id = fu.usr_id
GROUP BY
v_typ
),
MCVI AS (
SELECT
vc.v_typ,
mc.multi_category_viewers,
vc.total_viewers,
(mc.multi_category_viewers * 100.0 / vc.total_viewers) AS mcv_index
FROM
VideoCategoryCounts vc
LEFT JOIN
MultiCategoryViewers mc ON vc.v_typ = mc.v_typ
)
SELECT
v_typ,
multi_category_viewers,
total_viewers,
mcv_index
FROM
MCVI
ORDER BY
mcv_index DESC;
with duiying as(select t20.usr_id,t3.v_typ
from
bilibili_t20 t20
join bilibili_t3 t3
on t20.v_id=t3.v_id
group by 1,2
)
,duoge as(
select usr_id
from(
select usr_id,count(distinct v_typ) cnt
from
duiying
group by
1)a
where cnt>=3)
,duogetyp as(
select y.v_typ,count(d.usr_id) dcnt
from duiying y
join duoge d
on y.usr_id=d.usr_id
group by 1)
,aaa as
(select
v_typ,count(usr_id) as cnt
from duiying group by 1
)
selecta.v_typ,dcnt as multi_category_viewers
,cnt total_viewers
,dcnt/cnt*100 mcv_index
from duogetyp p
left joinaaa a
on a.v_typ=p.v_typ
order by mcv_index desc
with duiying as(select t20.usr_id,t3.v_typ
from
bilibili_t20 t20
join bilibili_t3 t3
on t20.v_id=t3.v_id
group by 1,2
)
,duoge as(
select usr_id
from(
select usr_id,count(distinct v_typ) cnt
from
duiying
group by
1)a
where cnt>=3)
,duogetyp as(
select y.v_typ,count(d.usr_id) dcnt
from duiying y
join duoge d
on y.usr_id=d.usr_id
group by 1)
,aaa as
(select
v_typ,count(usr_id) as cnt
from duiying group by 1
)
selecta.v_typ,dcnt as multi_category_viewers
,cnt total_viewers
,round(dcnt/cnt*100,2)mcv_index
from duogetyp p
left joinaaa a
on a.v_typ=p.v_typ
order by mcv_index desc
with duiying as(select t20.usr_id,t3.v_typ
from
bilibili_t20 t20
join bilibili_t3 t3
on t20.v_id=t3.v_id
group by 1,2
)
,duoge as(
select usr_id
from(
select usr_id,count(distinct v_typ) cnt
from
duiying
group by
1)a
where cnt>=3)
,duogetyp as(
select y.v_typ,count(d.usr_id) dcnt
from duiying y
join duoge d
on y.usr_id=d.usr_id
group by 1)
,aaa as
(select
v_typ,count(usr_id) as cnt
from duiying group by 1
)
selecta.v_typ,dcnt,cnt,round(dcnt/cnt*100,2)rate
from duogetyp p
left joinaaa a
on a.v_typ=p.v_typ
order by rate desc
with duiying as(select t20.usr_id,t3.v_typ
from
bilibili_t20 t20
join bilibili_t3 t3
on t20.v_id=t3.v_id
)
,users as(
select usr_id
from(
select
usr_id
from(
select usr_id,group_concat(distinct v_typ
order by v_typ) tag
from duiying
group by 1
)a
where tag in('汽车','IT','IT,汽车')) b
)
select
t20.usr_id,
t20.v_id,
t20.v_tm
from
bilibili_t20 t20
join users u
on u.usr_id=t20.usr_id
order by v_tm
with duiying as(select t20.usr_id,t3.v_typ
from
bilibili_t20 t20
left join bilibili_t3 t3
on t20.v_id=t3.v_id
group by 1,2
)
select
count(a.usr_id)
from(
select usr_id,count(v_typ) cnt
from duiying
group by 1
)a
left join
duiying d
on a.usr_id=d.usr_id
where cnt=1 and d.v_typ='放映厅'
with minlogin as(
select usr_id,min(v_date) mindate
from bilibili_t100
group by 1
),
newlogin as(
select mindate,count(usr_id) cnt
from minlogin
group by 1
order by 1
)
,minflg as(
select
mindate,count(t.usr_id) fcnt
from
minlogin m
left join bilibili_t100 t
onm.usr_id=t.usr_id
wherem_flg=1 andm.mindate=t.v_date
group by 1
)
select
g.mindate,g.cnt as new_users,
f.fcnt as new_members,
round(100*f.fcnt/g.cnt ,2)
from newlogin g
left join
minflg f
on g.mindate=f.mindate
order by 1