🎯 https://t.me/addlist/74yhTlKg8_00YWYy
Здесь вас ждут ответы на многие вопросы и множество интересного контента. 💪
Мир аналитика данных 在 Telegram 上分享的最新内容
import pandas as pd
signups_data = {
'user_id': [3, 7, 2, 6],
'time_stamp': [
'2020-03-21 10:16:13',
'2020-01-04 13:57:59',
'2020-07-29 23:09:44',
'2020-12-09 10:39:37'
]
}
signups = pd.DataFrame(signups_data)
confirmations_data = {
'user_id': [3, 3, 7, 7, 7, 2, 2],
'time_stamp': [
'2021-01-06 03:30:46',
'2021-07-14 14:00:00',
'2021-06-12 11:57:29',
'2021-06-13 12:58:28',
'2021-06-14 13:59:27',
'2021-01-22 00:00:00',
'2021-02-28 23:59:59'
],
'action': [
'timeout',
'timeout',
'confirmed',
'confirmed',
'confirmed',
'confirmed',
'timeout'
]
}
confirmations = pd.DataFrame(confirmations_data)
#Финальный запрос для LeetCode выглядит так:
SELECT
tt.user_id,
ROUND(count_confirmed::NUMERIC / count_all, 2) AS confirmation_rate
FROM (
SELECT
sd.user_id,
COUNT(*) FILTER (WHERE action = 'confirmed') AS count_confirmed,
COUNT(*) AS count_all
FROM signups sd
LEFT JOIN confirmations cd
ON cd.user_id = sd.user_id
GROUP BY 1
) tt;
import pandas as pd
from pandasql import sqldf
data = {
"id": [101, 102, 103, 104, 105, 106],
"name": ["John", "Dan", "James", "Amy", "Anne", "Ron"],
"department": ["A", "A", "A", "A", "A", "B"],
"managerId": [None, 101, 101, 101, 101, 101]
}
employee = pd.DataFrame(data)
query = """
SELECT name
FROM employee
WHERE id in (
SELECT managerId
FROM employee
GROUP BY managerId
HAVING COUNT(*) >= 5
)
"""
sqldf(query)
import pandas as pd
data = [
# Клиент 1: у него нет перерывов (три подписки друг за другом)
{
'client_id': 1,
'start': '2023-01-01 09:15:00',
'finish': '2023-02-01 10:55:00',
'title': 'тариф1',
'amount': 1000
},
{
'client_id': 1,
'start': '2023-02-01 10:55:00',
'finish': '2023-03-01 12:30:00',
'title': 'тариф2',
'amount': 1500
},
{
'client_id': 1,
'start': '2023-03-01 12:30:00',
'finish': '2023-04-01 09:00:00',
'title': 'тариф3',
'amount': 2000
},
# Клиент 2: между вторым и третьим тарифом появляется перерыв больше 30 дней
{
'client_id': 2,
'start': '2023-01-07 11:00:00',
'finish': '2023-02-07 10:45:00',
'title': 'тариф1',
'amount': 1200
},
{
'client_id': 2,
'start': '2023-02-07 10:45:00',
'finish': '2023-03-07 11:15:00',
'title': 'тариф2',
'amount': 1400
},
{
'client_id': 2,
'start': '2023-04-15 09:20:00',
'finish': '2023-05-15 10:00:00',
'title': 'тариф3',
'amount': 2100
},
# Клиент 3: тоже без перерывов (три тарифа подряд)
{
'client_id': 3,
'start': '2023-01-12 08:50:00',
'finish': '2023-02-12 10:05:00',
'title': 'тариф1',
'amount': 1000
},
{
'client_id': 3,
'start': '2023-02-12 10:05:00',
'finish': '2023-03-12 09:55:00',
'title': 'тариф2',
'amount': 1200
},
{
'client_id': 3,
'start': '2023-03-12 09:55:00',
'finish': '2023-04-12 10:10:00',
'title': 'тариф3',
'amount': 1400
},
]
df = pd.DataFrame(data)
df['start'] = pd.to_datetime(df['start'])
df['finish'] = pd.to_datetime(df['finish'])
df['prev_finish'] = df.groupby('client_id')['finish'].shift(1)
df['gap'] = (df['start'] - df['prev_finish']).dt.days
df.loc[df['gap'] > 30, 'check'] = 1
accounts_with_gap = tuple(set(df.loc[df['check'] == 1, 'client_id']))
df_winner = df[df['check'] != 1]
df_winner_only = df_winner[~df_winner['client_id'].isin(accounts_with_gap)]