with t1 as(
select
cust_uid,
count(*) as transaction_count,
count(distinct trx_dt) as active_days_count
from
mt_trx_rcd_f
group by
cust_uid
),
t2 as(
select
cust_uid,
transaction_count,
CAST(rank() over(order by transaction_count desc) as signed) as transaction_rank,
active_days_count,
CAST(rank() over(order by active_days_count desc) as signed) as active_days_rank
from
t1)
select
cust_uid,
transaction_count,
transaction_rank,
active_days_count,
active_days_rank,
abs(transaction_rank - active_days_rank) as rank_difference
from
t2
order by
rank_difference DESC
SELECT r.*
FROM didi_sht_rcd r
JOIN loc_nm_ctg l_start ON r.start_loc = l_start.loc_nm
JOIN loc_nm_ctg l_end ON r.end_loc = l_end.loc_nm
WHERE l_start.loc_ctg = '餐饮'
AND l_end.loc_ctg = '餐饮'
ORDER BY r.start_tm ASC;
WITH ConvertedTime AS (
SELECT
order_id,
cust_uid,
DATE_ADD(call_time, INTERVAL -3 HOUR) AS local_call_time,
grab_time,
cancel_time,
finish_time
FROM
didi_order_rcd
),
CallDates AS (
SELECT
order_id,
cust_uid,
local_call_time,
DATE(local_call_time) AS call_date
FROM
ConvertedTime
),
NextDayCalls AS (
SELECT
c1.order_id AS order_id_1,
c1.call_date AS call_date_1,
c2.order_id AS order_id_2,
c2.call_date AS call_date_2
FROM
CallDates c1
JOIN
CallDates c2
ON
c2.call_date = DATE_ADD(c1.call_date, INTERVAL 1 DAY)
AND
c1.cust_uid = c2.cust_uid
AND
c1.order_id = c2.order_id
),
NextDayCallCount AS (
SELECT
COUNT(DISTINCT order_id_1) AS next_day_call_count
FROM
NextDayCalls
),
TotalOrderCount AS (
SELECT
COUNT(order_id) AS total_order_count
FROM
didi_order_rcd
where finish_time = '1970-01-01 00:00:00'
)
SELECT
ncc.next_day_call_count,
toc.total_order_count,
CONCAT(FORMAT((ncc.next_day_call_count * 1.0 / toc.total_order_count) * 100, 2), '%') AS next_day_call_ratio
FROM
NextDayCallCount ncc,
TotalOrderCount toc;
with t1 as(
select
order_id
,cust_uid
,date_add(call_time, interval -3 hour) as local_call_time
,grab_time
,cancel_time
,finish_time
from
didi_order_rcd
)
select
hour(local_call_time) as local_hour
,count(1) as cnt
from
t1
group by
1
order by
2
SELECT
sum(TIMESTAMPDIFF(SECOND, call_time, grab_time))/count(1) AS avg_response_time_seconds
FROM
didi_order_rcd
WHERE
grab_time != '1970-01-01 00:00:00';
SELECT
COUNT(order_id) AS total_orders,
SUM(CASE WHEN grab_time != '1970-01-01 00:00:00' THEN 1 ELSE 0 END) AS answered_orders,
CONCAT(FORMAT((SUM(CASE WHEN grab_time != '1970-01-01 00:00:00' THEN 1 ELSE 0 END) * 1.0 / COUNT(order_id)) * 100, 2), '%') AS answer_rate
FROM
didi_order_rcd
WHERE
DATE(call_time) = '2021-05-03';
SELECT
substr(purchase_time, 1,7) AS mon,
CASE
WHEN product_type = 'iPhone' THEN 'iPhone'
ELSE 'Not iPhone'
END AS category,
COUNT(DISTINCT user_id) AS user_count
FROM apple_pchs_rcd
GROUP BY mon, category
ORDER BY mon, category;
WITH ranked_purchases AS (
SELECT
user_id,
product_type,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY purchase_time ASC) AS rn
FROM apple_pchs_rcd
),
first_and_second_purchase AS (
SELECT
user_id,
MAX(CASE WHEN rn = 1 THEN product_type END) AS first_product,
MAX(CASE WHEN rn = 2 THEN product_type END) AS second_product
FROM ranked_purchases
GROUP BY user_id
)
SELECT
user_id,
CASE
WHEN first_product = 'Mac' AND second_product = 'iPhone' THEN 1
ELSE 0
END AS tag
FROM first_and_second_purchase
ORDER BY user_id;
SELECT
user_id,
CASE
WHEN product_type = 'Mac' THEN 1
ELSE 0
END AS tag
FROM (
SELECT
user_id,
product_type,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY purchase_time ASC) AS rn
FROM apple_pchs_rcd
) AS subquery
WHERE rn = 1
ORDER BY user_id;
SELECT
YEARWEEK(purchase_time, 1) AS week_number,
CASE
WHEN product_type = 'iPhone' THEN 'iPhone'
ELSE 'Not iPhone'
END AS category,
COUNT(DISTINCT user_id) AS user_count
FROM apple_pchs_rcd
GROUP BY week_number, category
ORDER BY week_number, category;
WITH user_login_days AS (
SELECT
usr_id,
DATE(login_time) AS login_date
FROM
user_login_log
WHERE
login_time >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
GROUP BY
usr_id, DATE(login_time)
),
ranked_logins AS (
SELECT
usr_id,
login_date,
ROW_NUMBER() OVER (PARTITION BY usr_id ORDER BY login_date) AS row_num
FROM
user_login_days
),
grouped_logins AS (
SELECT
usr_id,
login_date,
login_date - INTERVAL row_num DAY AS grp
FROM
ranked_logins
),
consecutive_logins AS (
SELECT
usr_id,
MIN(login_date) AS start_date,
MAX(login_date) AS end_date,
COUNT(*) AS consecutive_days
FROM
grouped_logins
GROUP BY
usr_id, grp
HAVING
COUNT(*) > 2
)
SELECT
usr_id,
start_date,
end_date,
consecutive_days
FROM
consecutive_logins
ORDER BY
usr_id ASC,
start_date ;
WITH user_login_days AS (
SELECT
usr_id,
DATE(login_time) AS login_date
FROM
user_login_log
WHERE
login_time >= DATE_SUB(CURDATE(), INTERVAL 180 DAY)
),
distinct_login_days AS (
SELECT
usr_id,
COUNT(DISTINCT login_date) AS login_days
FROM
user_login_days
GROUP BY
usr_id
)
SELECT
SUM(CASE WHEN login_days BETWEEN 1 AND 5 THEN 1 ELSE 0 END) AS days_1_to_5,
SUM(CASE WHEN login_days BETWEEN 6 AND 10 THEN 1 ELSE 0 END) AS days_6_to_10,
SUM(CASE WHEN login_days BETWEEN 11 AND 20 THEN 1 ELSE 0 END) AS days_11_to_20,
SUM(CASE WHEN login_days > 20 THEN 1 ELSE 0 END) AS days_over_20
FROM
distinct_login_days;
SELECT
COUNT(DISTINCT CASE
WHEN TIME(login_time) BETWEEN '07:30:00' AND '09:30:00'
OR TIME(login_time) BETWEEN '18:30:00' AND '20:30:00' THEN usr_id
END) AS commute,
COUNT(DISTINCT CASE
WHEN TIME(login_time) BETWEEN '11:30:00' AND '14:00:00' THEN usr_id
END) AS lunch_break,
COUNT(DISTINCT CASE
WHEN TIME(login_time) BETWEEN '22:30:00' AND '23:59:59' THEN usr_id
WHEN TIME(login_time) BETWEEN '00:00:00' AND '01:00:00' THEN usr_id
END) AS bedtime
FROM
user_login_log
WHERE
login_time >= DATE_FORMAT(DATE_SUB(CURDATE(), INTERVAL 1 MONTH), '%Y-%m-01')
AND login_time < DATE_FORMAT(CURDATE(), '%Y-%m-01');
WITH monthly_unique_logins AS (
SELECT
usr_id,
DATE_FORMAT(login_time, '%Y-%m-01') AS login_month
FROM
user_login_log
WHERE
login_time >= '2024-01-01' AND login_time < '2025-01-01'
GROUP BY
usr_id,
DATE_FORMAT(login_time, '%Y-%m-01')
),
new_users AS (
SELECT
usr_id,
MIN(login_month) AS first_login_month
FROM
monthly_unique_logins
GROUP BY
usr_id
),
next_month_logins AS (
SELECT
nu.usr_id,
nu.first_login_month AS current_month,
mul.login_month AS next_month
FROM
new_users nu
LEFT JOIN
monthly_unique_logins mul
ON
nu.usr_id = mul.usr_id AND
mul.login_month = DATE_ADD(nu.first_login_month, INTERVAL 1 MONTH)
)
SELECT
current_month,
ROUND(COUNT(DISTINCT CASE WHEN next_month IS NOT NULL THEN usr_id END) * 100.0 / COUNT(DISTINCT usr_id), 2) AS t_plus_1_month_retention_rate
FROM
next_month_logins
WHERE
current_month >= '2024-01-01' AND current_month < '2025-01-01'
GROUP BY
current_month
ORDER BY
current_month;
WITH monthly_unique_logins AS (
SELECT
usr_id,
DATE_FORMAT(login_time, '%Y-%m-01') AS login_month
FROM
user_login_log
WHERE
login_time >= '2024-01-01' AND login_time < '2025-01-01'
GROUP BY
usr_id,
DATE_FORMAT(login_time, '%Y-%m-01')
),
next_month_logins AS (
SELECT
mul1.usr_id,
mul1.login_month AS current_month,
mul2.login_month AS next_month
FROM
monthly_unique_logins mul1
LEFT JOIN
monthly_unique_logins mul2
ON
mul1.usr_id = mul2.usr_id AND
mul2.login_month = DATE_ADD(mul1.login_month, INTERVAL 1 MONTH)
)
SELECT
current_month,
ROUND(COUNT(DISTINCT CASE WHEN next_month IS NOT NULL THEN usr_id END) * 100.0 / COUNT(DISTINCT usr_id), 2) AS t_plus_1_month_retention_rate
FROM
next_month_logins
WHERE
current_month >= '2024-01-01' AND current_month < '2024-12-01'
GROUP BY
current_month
ORDER BY
current_month;
WITH daily_unique_logins AS (
SELECT
usr_id,
DATE(login_time) AS login_date
FROM
user_login_log
WHERE
login_time >= DATE_SUB(CURDATE(), INTERVAL 90 DAY)
GROUP BY
usr_id,
DATE(login_time)
),
retention_days AS (
SELECT
dul1.usr_id,
dul1.login_date AS first_login_date,
dul2.login_date AS next_day_login_date,
DATEDIFF(dul2.login_date, dul1.login_date) AS days_diff
FROM
daily_unique_logins dul1
LEFT JOIN
daily_unique_logins dul2
ON
dul1.usr_id = dul2.usr_id AND
dul2.login_date BETWEEN dul1.login_date + INTERVAL 1 DAY AND dul1.login_date + INTERVAL 14 DAY
)
SELECT
first_login_date,
ROUND(COUNT(DISTINCT CASE WHEN days_diff BETWEEN 1 AND 3 THEN usr_id END) * 100.0 / COUNT(DISTINCT usr_id), 2) AS t_plus_3_retention_rate,
ROUND(COUNT(DISTINCT CASE WHEN days_diff BETWEEN 1 AND 7 THEN usr_id END) * 100.0 / COUNT(DISTINCT usr_id), 2) AS t_plus_7_retention_rate,
ROUND(COUNT(DISTINCT CASE WHEN days_diff BETWEEN 1 AND 14 THEN usr_id END) * 100.0 / COUNT(DISTINCT usr_id), 2) AS t_plus_14_retention_rate
FROM
retention_days
WHERE
first_login_date >= DATE_SUB(CURDATE(), INTERVAL 90 DAY)
GROUP BY
first_login_date
ORDER BY
first_login_date;
WITH daily_unique_logins AS (
SELECT
usr_id,
DATE(login_time) AS login_date
FROM
user_login_log
WHERE
login_time >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
GROUP BY
usr_id,
DATE(login_time)
),
retention_days AS (
SELECT
dul1.usr_id,
dul1.login_date AS first_login_date,
dul2.login_date AS next_day_login_date,
DATEDIFF(dul2.login_date, dul1.login_date) AS days_diff
FROM
daily_unique_logins dul1
LEFT JOIN
daily_unique_logins dul2
ON
dul1.usr_id = dul2.usr_id AND
dul2.login_date BETWEEN dul1.login_date + INTERVAL 1 DAY AND dul1.login_date + INTERVAL 14 DAY
)
SELECT
first_login_date,
ROUND(COUNT(DISTINCT CASE WHEN days_diff = 1 THEN usr_id END) * 100.0 / COUNT(DISTINCT usr_id), 2) AS t_plus_1_retention_rate,
ROUND(COUNT(DISTINCT CASE WHEN days_diff = 3 THEN usr_id END) * 100.0 / COUNT(DISTINCT usr_id), 2) AS t_plus_3_retention_rate,
ROUND(COUNT(DISTINCT CASE WHEN days_diff = 7 THEN usr_id END) * 100.0 / COUNT(DISTINCT usr_id), 2) AS t_plus_7_retention_rate,
ROUND(COUNT(DISTINCT CASE WHEN days_diff = 14 THEN usr_id END) * 100.0 / COUNT(DISTINCT usr_id), 2) AS t_plus_14_retention_rate
FROM
retention_days
WHERE
first_login_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
GROUP BY
first_login_date
ORDER BY
first_login_date;
with data1 as (
select distinct
usr_id,
date(login_time) as login_date
from
user_login_log
where
datediff(current_date, date(login_time)) <= 30
),
data2 as (
select
T.usr_id,
T.login_date as T_date,
T_1.login_date as T_1_date
from
data1 as T
left join
data1 as T_1
on
T.usr_id = T_1.usr_id
and datediff(T.login_date, T_1.login_date) = -1
)
select
T_date as first_login_date,
concat(round(avg(T_1_date is not null)*100, 2), '%') as T1_retention_rate
from
data2
group by
T_date
order by
T_date;