with CommuteRecords AS (
SELECT
user_id,
DATE_FORMAT(start_time, '%Y-%m-01') AS month_start,
COUNT(DISTINCT DATE(start_time)) AS commute_days
FROM hello_bike_riding_rcd rcd
where start_loc in ('中关村电子城','凯德广场','北京机床研究所','天启大厦','恒通商务园','瀚海国际大厦','西门子大厦') and end_loc in ('望京', '望京南', '阜通')
or end_loc in ('中关村电子城','凯德广场','北京机床研究所','天启大厦','恒通商务园','瀚海国际大厦','西门子大厦') and start_loc in ('望京', '望京南', '阜通')
GROUP BY user_id, DATE_FORMAT(start_time, '%Y-%m-01')
),
ConsecutiveMonths AS (
SELECT
user_id,
month_start,
LEAD(month_start, 1) OVER (PARTITION BY user_id ORDER BY month_start) AS next_month_start,
LEAD(month_start, 2) OVER (PARTITION BY user_id ORDER BY month_start) AS next_next_month_start
FROM CommuteRecords
WHERE commute_days >= 5
),
ActiveUsers AS (
SELECT user_id
,case when (TIMESTAMPDIFF(MONTH, month_start, next_month_start) = 1 AND
TIMESTAMPDIFF(MONTH, next_month_start, next_next_month_start) = 1) then 1 else 0 end as active_tag
FROM ConsecutiveMonths
)
SELECT
user_id,max(active_tag) as active_tag
FROM ActiveUsers
group by user_id
with CommuteRecords AS (
SELECT
user_id,
DATE_FORMAT(start_time, '%Y-%m-01') AS month_start,
COUNT(DISTINCT DATE(start_time)) AS commute_days
FROM hello_bike_riding_rcd rcd
where start_loc in ('中关村电子城','凯德广场','北京机床研究所','天启大厦','恒通商务园','瀚海国际大厦','西门子大厦') and end_loc in ('望京', '望京南', '阜通')
GROUP BY user_id, DATE_FORMAT(start_time, '%Y-%m-01')
),
ConsecutiveMonths AS (
SELECT
user_id,
month_start,
LEAD(month_start, 1) OVER (PARTITION BY user_id ORDER BY month_start) AS next_month_start,
LEAD(month_start, 2) OVER (PARTITION BY user_id ORDER BY month_start) AS next_next_month_start
FROM CommuteRecords
WHERE commute_days >= 5
),
ActiveUsers AS (
SELECT user_id
,case when (TIMESTAMPDIFF(MONTH, month_start, next_month_start) = 1 AND
TIMESTAMPDIFF(MONTH, next_month_start, next_next_month_start) = 1) then 1 else 0 end as active_tag
FROM ConsecutiveMonths
)
SELECT
user_id,max(active_tag) as active_tag
FROM ActiveUsers
group by user_id
with cleared_gd_loc_map as
(select loc_nm, case when loc_nm='北京机床研究所' then '写字楼' when loc_nm='将台西' then '地铁站' else loc_type end as loc_type
from gd_loc_map)
,CommuteRecords AS (
SELECT
r.user_id,
DATE_FORMAT(r.start_time, '%Y-%m') AS month,
COUNT(DISTINCT DATE(r.start_time)) AS commute_days
FROM
hello_bike_riding_rcd r
INNER JOIN
cleared_gd_loc_map s ON r.start_loc = s.loc_nm AND s.loc_type = '地铁站'
INNER JOIN
cleared_gd_loc_map e ON r.end_loc = e.loc_nm AND e.loc_type = '写字楼'
WHERE
date(r.start_time) BETWEEN '2020-01-01' AND '2024-12-31'
GROUP BY
r.user_id, DATE_FORMAT(r.start_time, '%Y-%m')
),
ActiveUsers AS (
SELECT
user_id,
month,
SUM(CASE WHEN commute_days >= 5 THEN 1 ELSE 0 END) OVER (PARTITION BY user_id ORDER BY month) AS active_months
FROM
CommuteRecords
)
SELECT DISTINCT
user_id,
CASE WHEN MAX(active_months) OVER (PARTITION BY user_id) >= 3 THEN 1 ELSE 0 END AS active_tag
FROM
ActiveUsers;
with cleared_gd_loc_map as
(select loc_nm, case when loc_nm='北京机床研究所' then '写字楼' when loc_nm='将台西' then '地铁站' else loc_type end as loc_type
from gd_loc_map)
,CommuteRecords AS (
SELECT
r.user_id,
DATE_FORMAT(r.start_time, '%Y-%m') AS month,
COUNT(DISTINCT DATE(r.start_time)) AS commute_days
FROM
hello_bike_riding_rcd r
INNER JOIN
cleared_gd_loc_map s ON r.start_loc = s.loc_nm AND s.loc_type = '地铁站'
INNER JOIN
cleared_gd_loc_map e ON r.end_loc = e.loc_nm AND e.loc_type = '写字楼'
WHERE
date(r.start_time) BETWEEN '2020-01-01' AND '2024-12-31'
GROUP BY
r.user_id, DATE_FORMAT(r.start_time, '%Y-%m')
),
ActiveUsers AS (
SELECT
user_id,
month,
SUM(CASE WHEN commute_days >= 5 THEN 1 ELSE 0 END) OVER (PARTITION BY user_id ORDER BY month ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS active_months
FROM
CommuteRecords
)
SELECT DISTINCT
user_id,
CASE WHEN MAX(active_months) OVER (PARTITION BY user_id) >= 3 THEN 1 ELSE 0 END AS active_tag
FROM
ActiveUsers;
WITH CommuteRecords AS (
SELECT
r.user_id,
DATE_FORMAT(r.start_time, '%Y-%m') AS month,
COUNT(DISTINCT DATE(r.start_time)) AS commute_days
FROM
hello_bike_riding_rcd r
INNER JOIN
gd_loc_map s ON r.start_loc = s.loc_nm AND s.loc_type = '地铁站'
INNER JOIN
gd_loc_map e ON r.end_loc = e.loc_nm AND e.loc_type = '写字楼'
WHERE
date(r.start_time) BETWEEN '2020-01-01' AND '2024-12-31'
GROUP BY
r.user_id, DATE_FORMAT(r.start_time, '%Y-%m')
),
ActiveUsers AS (
SELECT
user_id,
month,
SUM(CASE WHEN commute_days >= 5 THEN 1 ELSE 0 END) OVER (PARTITION BY user_id ORDER BY month ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS active_months
FROM
CommuteRecords
)
SELECT DISTINCT
user_id,
CASE WHEN MAX(active_months) OVER (PARTITION BY user_id) >= 3 THEN 1 ELSE 0 END AS active_tag
FROM
ActiveUsers;
WITH CommuteRecords AS (
SELECT
r.user_id,
DATE_FORMAT(r.start_time, '%Y-%m') AS month,
COUNT(DISTINCT DATE(r.start_time)) AS commute_days
FROM
hello_bike_riding_rcd r
INNER JOIN
gd_loc_map s ON r.start_loc = s.loc_nm AND s.loc_type = '地铁站'
INNER JOIN
gd_loc_map e ON r.end_loc = e.loc_nm AND e.loc_type = '写字楼'
WHERE
r.start_time BETWEEN '2020-01-01' AND '2024-12-31'
GROUP BY
r.user_id, DATE_FORMAT(r.start_time, '%Y-%m')
),
ActiveUsers AS (
SELECT
user_id,
month,
SUM(CASE WHEN commute_days >= 5 THEN 1 ELSE 0 END) OVER (PARTITION BY user_id ORDER BY month ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS active_months
FROM
CommuteRecords
)
SELECT DISTINCT
user_id,
CASE WHEN MAX(active_months) OVER (PARTITION BY user_id) >= 3 THEN 1 ELSE 0 END AS active_tag
FROM
ActiveUsers;
with cleared_gd_loc_map as
(select * from
gd_loc_map
where loc_nm not in ('北京机床研究所','将台西')
union
select '北京机床研究所' ,'写字楼')
,CommuteRecords AS (
SELECT
user_id,
DATE_FORMAT(start_time, '%Y-%m-01') AS month_start,
COUNT(DISTINCT DATE(start_time)) AS commute_days
FROM hello_bike_riding_rcd rcd
JOIN cleared_gd_loc_map start_loc ON rcd.start_loc = start_loc.loc_nm
JOIN cleared_gd_loc_map end_loc ON rcd.end_loc = end_loc.loc_nm
WHERE start_loc.loc_type = '地铁站' AND end_loc.loc_type = '写字楼'
or end_loc.loc_type = '地铁站' AND start_loc.loc_type = '写字楼'
GROUP BY user_id, DATE_FORMAT(start_time, '%Y-%m-01')
),
ConsecutiveMonths AS (
SELECT
user_id,
month_start,
LEAD(month_start, 1) OVER (PARTITION BY user_id ORDER BY month_start) AS next_month_start,
LEAD(month_start, 2) OVER (PARTITION BY user_id ORDER BY month_start) AS next_next_month_start
FROM CommuteRecords
WHERE commute_days >= 5
),
ActiveUsers AS (
SELECT user_id
,case when (TIMESTAMPDIFF(MONTH, month_start, next_month_start) = 1 AND
TIMESTAMPDIFF(MONTH, next_month_start, next_next_month_start) = 1) then 1 else 0 end as active_tag
FROM ConsecutiveMonths
)
SELECT
user_id,max(active_tag) as active_tag
FROM ActiveUsers
group by user_id