We want to find the longest streak of consecutive passes between the same team members. This is a classic problem of finding the longest consecutive segment with the same value, which can be solved using window functions in SQL or groupby with diff in Pandas.
SELECT passer, receiver, MAX(streak) AS max_streak
FROM (
SELECT*,
ROW_NUMBER() OVER (ORDERBY time) - ROW_NUMBER() OVER (PARTITION BY passer, receiver ORDERBY time) AS grp,
COUNT(*) OVER (PARTITION BY passer, receiver,
ROW_NUMBER() OVER (ORDERBY time) - ROW_NUMBER() OVER (PARTITION BY passer, receiver ORDERBY time)) AS streak
FROM Passes
) t
GROUPBY passer, receiver;
1
2
3
4
5
6
7
8
9
10
SELECT passer, receiver, MAX(streak) AS max_streak
FROM (
SELECT*,
ROW_NUMBER() OVER (ORDERBY time) - ROW_NUMBER() OVER (PARTITION BY passer, receiver ORDERBY time) AS grp,
COUNT(*) OVER (PARTITION BY passer, receiver,
ROW_NUMBER() OVER (ORDERBY time) - ROW_NUMBER() OVER (PARTITION BY passer, receiver ORDERBY time)) AS streak
FROM Passes
) t
GROUPBY passer, receiver;
1
2
3
4
5
6
7
8
9
10
import pandas as pd
deflongest_team_pass_streak(df: pd.DataFrame) -> pd.DataFrame:
df = df.sort_values('time')
grp = (df[['passer', 'receiver']] != df[['passer', 'receiver']].shift()).any(axis=1).cumsum()
df['grp'] = grp
streaks = df.groupby(['passer', 'receiver', 'grp']).size().reset_index(name='streak')
result = streaks.groupby(['passer', 'receiver'])['streak'].max().reset_index()
result = result.rename(columns={'streak': 'max_streak'})
return result