Skip to content

Commit

Permalink
Merge pull request #4 from PySport/add-some-helpers
Browse files Browse the repository at this point in the history
Add some helpers
  • Loading branch information
koenvo authored May 12, 2020
2 parents 6f6a1bb + 1442a75 commit 1e32fdb
Show file tree
Hide file tree
Showing 13 changed files with 316 additions and 92 deletions.
30 changes: 25 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,21 @@ package index](https://pypi.org/project/kloppy).
pip install kloppy
```

## Quickstart
We added some helper to get started really quickly. The helpers allow eay loading, transforming and converting to pandas of tracking data.
```python
from kloppy import load_metrica_tracking_data, load_tracab_tracking_data, to_pandas, transform

# metrica data
data_set = load_metrica_tracking_data('home_file.csv', 'away_file.csv')
# or tracab
data_set = load_tracab_tracking_data('meta.xml', 'raw_data.txt')

data_set = transform(data_set, pitch_dimensions=[[0, 108], [-34, 34]])
pandas_data_frame = to_pandas(data_set)

```

### <a name="models"></a>Standardized models
Most providers use different names for the same thing. This module tries to model the real world as much as possible.
Understandable models are important and in some cases this means performance is subordinate to models that are easy to
Expand Down Expand Up @@ -152,19 +167,24 @@ Event data (de)serializers
- [ ] MetricaSports

Transformers
- [ ] Automated tests
- [X] Transform pitch dimensions
- [X] Transform orientation of points
- [x] Automated tests
- [x] Transform pitch dimensions
- [x] Transform orientation of points

Filters
- [ ] Automated tests
- [ ] Smoothing filters for tracking dataset

Helpers
- [x] Load tracking data
- [x] Transform pitch dimensions and orientation
- [x] Export to pandas dataframe

Importers
- [ ] Automated tests
- [ ] Pandas dataframe

Exporters
- [ ] Automated tests
- [ ] Pandas dataframe
- [x] Automated tests
- [x] Pandas dataframe
- [ ] SPADL json
3 changes: 2 additions & 1 deletion kloppy/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .infra.serializers import *
from .infra.serializers import *
from .helpers import *
8 changes: 6 additions & 2 deletions kloppy/domain/models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,17 @@ def get_orientation_factor(self,
elif self == Orientation.HOME_TEAM:
if attacking_direction == AttackingDirection.HOME_AWAY:
return -1
else:
elif attacking_direction == AttackingDirection.AWAY_HOME:
return 1
else:
raise Exception("AttackingDirection not set")
elif self == Orientation.AWAY_TEAM:
if attacking_direction == AttackingDirection.AWAY_HOME:
return -1
else:
elif attacking_direction == AttackingDirection.HOME_AWAY:
return 1
else:
raise Exception("AttackingDirection not set")
elif self == Orientation.BALL_OWNING_TEAM:
if ((ball_owning_team == Team.HOME
and attacking_direction == AttackingDirection.HOME_AWAY)
Expand Down
8 changes: 6 additions & 2 deletions kloppy/domain/services/transformers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import TypeVar

from kloppy.domain import (
Point,
PitchDimensions,
Expand Down Expand Up @@ -76,11 +78,13 @@ def transform_frame(self, frame: Frame) -> Frame:
}
)

DataSetType = TypeVar('DataSetType')

@classmethod
def transform_data_set(cls,
data_set: DataSet,
data_set: DataSetType,
to_pitch_dimensions: PitchDimensions = None,
to_orientation: Orientation = None) -> DataSet:
to_orientation: Orientation = None) -> DataSetType:
if not to_pitch_dimensions and not to_orientation:
return data_set
elif not to_orientation:
Expand Down
91 changes: 91 additions & 0 deletions kloppy/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
from typing import Callable, TypeVar

from . import TRACABSerializer, MetricaTrackingSerializer
from .domain import DataSet, Frame, TrackingDataSet, Transformer, Orientation, PitchDimensions, Dimension


def load_tracab_tracking_data(meta_data_filename: str, raw_data_filename: str, options: dict = None) -> DataSet:
serializer = TRACABSerializer()
with open(meta_data_filename, "rb") as meta_data, \
open(raw_data_filename, "rb") as raw_data:

return serializer.deserialize(
inputs={
'meta_data': meta_data,
'raw_data': raw_data
},
options=options
)


def load_metrica_tracking_data(raw_data_home_filename: str, raw_data_away_filename: str, options: dict = None) -> DataSet:
serializer = MetricaTrackingSerializer()
with open(raw_data_home_filename, "rb") as raw_data_home, \
open(raw_data_away_filename, "rb") as raw_data_away:

return serializer.deserialize(
inputs={
'raw_data_home': raw_data_home,
'raw_data_away': raw_data_away
},
options=options
)


DataSetType = TypeVar('DataSetType')


def transform(data_set: DataSetType, to_orientation=None, to_pitch_dimensions=None) -> DataSetType:
if to_orientation and isinstance(to_orientation, str):
to_orientation = Orientation[to_orientation]
if to_pitch_dimensions and (isinstance(to_pitch_dimensions, list) or isinstance(to_pitch_dimensions, tuple)):
to_pitch_dimensions = PitchDimensions(
x_dim=Dimension(*to_pitch_dimensions[0]),
y_dim=Dimension(*to_pitch_dimensions[1])
)
return Transformer.transform_data_set(
data_set=data_set,
to_orientation=to_orientation,
to_pitch_dimensions=to_pitch_dimensions
)


def _frame_to_pandas_row_converter(frame: Frame) -> dict:
row = dict(
period_id=frame.period.id,
timestamp=frame.timestamp,
ball_state=frame.ball_state,
ball_owning_team=frame.ball_owning_team,
ball_x=frame.ball_position.x if frame.ball_position else None,
ball_y=frame.ball_position.y if frame.ball_position else None
)
for jersey_no, position in frame.home_team_player_positions.items():
row.update({
f'player_home_{jersey_no}_x': position.x,
f'player_home_{jersey_no}_y': position.y
})
for jersey_no, position in frame.away_team_player_positions.items():
row.update({
f'player_away_{jersey_no}_x': position.x,
f'player_away_{jersey_no}_y': position.y
})

return row


def to_pandas(data_set: DataSet, _record_converter: Callable = None) -> 'DataFrame':
try:
import pandas as pd
except ImportError:
raise Exception("Seems like you don't have pandas installed. Please"
" install it using: pip install pandas")

if not _record_converter:
if isinstance(data_set, TrackingDataSet):
_record_converter = _frame_to_pandas_row_converter
else:
raise Exception("Unknown dataset type")

return pd.DataFrame.from_records(
map(_record_converter, data_set.records)
)
9 changes: 9 additions & 0 deletions kloppy/tests/files/metrica_away.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
,,,Away,,Away,,Away,,Away,,Away,,Away,,Away,,Away,,Away,,Away,,Away,,Away,,Away,,Away,,Away,
,,,25,,15,,16,,17,,18,,19,,20,,21,,22,,23,,24,,26,,27,,28,,,
Period,Frame,Time [s],Player25,,Player15,,Player16,,Player17,,Player18,,Player19,,Player20,,Player21,,Player22,,Player23,,Player24,,Player26,,Player27,,Player28,,Ball,
1,1,0.04,0.90509,0.47462,0.58393,0.20794,0.67658,0.4671,0.6731,0.76476,0.40783,0.61525,0.45472,0.38709,0.5596,0.67775,0.55243,0.43269,0.50067,0.94322,0.43693,0.05002,0.37833,0.27383,NaN,NaN,NaN,NaN,NaN,NaN,0.45472,0.38709
1,2,0.08,0.90494,0.47462,0.58393,0.20794,0.67658,0.4671,0.6731,0.76476,0.40783,0.61525,0.45472,0.38709,0.5596,0.67775,0.55243,0.43269,0.50067,0.94322,0.43693,0.05002,0.37833,0.27383,NaN,NaN,NaN,NaN,NaN,NaN,0.49645,0.40656
1,3,0.12,0.90434,0.47463,0.58393,0.20794,0.67658,0.4671,0.6731,0.76476,0.40783,0.61525,0.45472,0.38709,0.5596,0.67775,0.55243,0.43269,0.50067,0.94322,0.43693,0.05002,0.37833,0.27383,NaN,NaN,NaN,NaN,NaN,NaN,0.53716,0.42556
2,145004,5800.16,0.12564,0.55386,0.17792,0.56682,0.25757,0.60019,0.0988,0.92391,0.21235,0.77391,NaN,NaN,0.14926,0.56204,0.10285,0.81944,NaN,NaN,0.29331,0.488,NaN,NaN,0.35561,0.55254,0.19805,0.452,0.21798,0.81079,NaN,NaN
2,145005,5800.2,0.12564,0.55386,0.1773,0.56621,0.25721,0.60089,0.0988,0.92391,0.21235,0.77391,NaN,NaN,0.14857,0.56068,0.10231,0.81944,NaN,NaN,0.29272,0.48789,NaN,NaN,0.35532,0.55243,0.19766,0.45237,0.21798,0.81079,NaN,NaN
2,145006,5800.24,0.12564,0.55386,0.17693,0.56675,0.25659,0.60072,0.0988,0.92391,0.21235,0.77391,NaN,NaN,0.14846,0.56017,0.10187,0.8198,NaN,NaN,0.29267,0.48903,NaN,NaN,0.35495,0.55364,0.19754,0.45364,0.21798,0.81079,NaN,NaN
9 changes: 9 additions & 0 deletions kloppy/tests/files/metrica_home.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
,,,Home,,Home,,Home,,Home,,Home,,Home,,Home,,Home,,Home,,Home,,Home,,Home,,Home,,Home,,,
,,,11,,1,,2,,3,,4,,5,,6,,7,,8,,9,,10,,12,,13,,14,,,
Period,Frame,Time [s],Player11,,Player1,,Player2,,Player3,,Player4,,Player5,,Player6,,Player7,,Player8,,Player9,,Player10,,Player12,,Player13,,Player14,,Ball,
1,1,0.04,0.00082,0.48238,0.32648,0.65322,0.33701,0.48863,0.30927,0.35529,0.32137,0.21262,0.41094,0.72589,0.41698,0.47843,0.39125,0.3255,0.45388,0.21174,0.52697,0.3798,0.55243,0.43269,NaN,NaN,NaN,NaN,NaN,NaN,0.45472,0.38709
1,2,0.08,0.00096,0.48238,0.32648,0.65322,0.33701,0.48863,0.30927,0.35529,0.32137,0.21262,0.41094,0.72589,0.41698,0.47843,0.39125,0.3255,0.45388,0.21174,0.52697,0.3798,0.55243,0.43269,NaN,NaN,NaN,NaN,NaN,NaN,0.49645,0.40656
1,3,0.12,0.00114,0.48238,0.32648,0.65322,0.33701,0.48863,0.30927,0.35529,0.32137,0.21262,0.41094,0.72589,0.41698,0.47843,0.39125,0.3255,0.45388,0.21174,0.52697,0.3798,0.55243,0.43269,NaN,NaN,NaN,NaN,NaN,NaN,0.53716,0.42556
2,145004,5800.16,0.90492,0.45355,NaN,NaN,0.34089,0.64569,0.31214,0.67501,0.11428,0.92765,0.25757,0.60019,NaN,NaN,0.37398,0.62446,0.17401,0.83396,0.1667,0.76677,NaN,NaN,0.30044,0.68311,0.33637,0.65366,0.34089,0.64569,NaN,NaN
2,145005,5800.2,0.90456,0.45356,NaN,NaN,0.34056,0.64552,0.31171,0.67468,0.11428,0.92765,0.25721,0.60089,NaN,NaN,0.37398,0.62446,0.17358,0.8343,0.16638,0.76665,NaN,NaN,0.30044,0.68311,0.33615,0.65317,0.34056,0.64552,NaN,NaN
2,145006,5800.24,0.90456,0.45356,NaN,NaN,0.33996,0.64544,0.31122,0.67532,0.11428,0.92765,0.25659,0.60072,NaN,NaN,0.37398,0.62446,0.17327,0.8346,0.1659,0.76555,NaN,NaN,0.30044,0.68311,0.33563,0.65166,0.33996,0.64544,NaN,NaN
15 changes: 15 additions & 0 deletions kloppy/tests/files/tracab_meta.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<TracabMetaData sVersion="1.0">
<match
iId="1337"
dtDate="2020-01-02 03:04:05"
iFrameRateFps="25"
fPitchXSizeMeters="100.00"
fPitchYSizeMeters="60.00"
fTrackingAreaXSizeMeters="105.00"
fTrackingAreaYSizeMeters="70.00">
<period iId="1" iStartFrame="100" iEndFrame="102"/>
<period iId="2" iStartFrame="200" iEndFrame="202"/>
<period iId="3" iStartFrame="0" iEndFrame="0"/>
<period iId="4" iStartFrame="0" iEndFrame="0"/>
</match>
</TracabMetaData>
8 changes: 8 additions & 0 deletions kloppy/tests/files/tracab_raw.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
100:0,1,19,8889,-666,0.55;1,2,19,-1234,-294,0.07;:-27,25,0,27.00,H,Alive;:
101:0,1,19,8889,-666,0.55;1,2,19,-1234,-294,0.07;:-27,25,0,27.00,A,Alive;:
102:0,1,19,8889,-666,0.55;1,2,19,-1234,-294,0.07;:-27,25,0,27.00,H,Dead;:

200:0,1,1337,-8889,-666,0.55;1,2,19,-1234,-294,0.07;:-27,25,0,27.00,H,Alive;:
201:0,1,1337,-8889,-666,0.55;1,2,19,-1234,-294,0.07;:-27,25,0,27.00,H,Alive;:
202:0,1,1337,-8889,-666,0.55;1,2,19,-1234,-294,0.07;:-27,25,0,27.00,H,Alive;:
203:0,1,1337,-8889,-666,0.55;1,2,19,-1234,-294,0.07;:-27,25,0,27.00,H,Alive;:
104 changes: 104 additions & 0 deletions kloppy/tests/test_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import os
from io import BytesIO

from pandas import DataFrame
from pandas.testing import assert_frame_equal

from kloppy import MetricaTrackingSerializer, to_pandas, load_metrica_tracking_data, load_tracab_tracking_data, \
TrackingDataSet, PitchDimensions, Dimension, Orientation, Frame, transform
from kloppy.domain import Period, DataSetFlag, Point, AttackingDirection
from kloppy.infra.utils import performance_logging


class TestHelpers:
def test_load_metrica_tracking_data(self):
base_dir = os.path.dirname(__file__)
data_set = load_metrica_tracking_data(
f'{base_dir}/files/metrica_home.csv',
f'{base_dir}/files/metrica_away.csv'
)
assert len(data_set.records) == 6
assert len(data_set.periods) == 2

def test_load_tracab_tracking_data(self):
base_dir = os.path.dirname(__file__)
data_set = load_tracab_tracking_data(
f'{base_dir}/files/tracab_meta.xml',
f'{base_dir}/files/tracab_raw.dat'
)
assert len(data_set.records) == 5 # only alive=True
assert len(data_set.periods) == 2

def _get_dataset(self):
periods = [
Period(id=1, start_timestamp=0.0, end_timestamp=10.0, attacking_direction=AttackingDirection.HOME_AWAY),
Period(id=2, start_timestamp=15.0, end_timestamp=25.0, attacking_direction=AttackingDirection.AWAY_HOME)
]
tracking_data = TrackingDataSet(
flags=~(DataSetFlag.BALL_OWNING_TEAM | DataSetFlag.BALL_STATE),
pitch_dimensions=PitchDimensions(
x_dim=Dimension(0, 100),
y_dim=Dimension(-50, 50)
),
orientation=Orientation.HOME_TEAM,
frame_rate=25,
records=[
Frame(
frame_id=1,
timestamp=0.1,
ball_owning_team=None,
ball_state=None,
period=periods[0],

away_team_player_positions={},
home_team_player_positions={},
ball_position=Point(x=100, y=-50)
),
Frame(
frame_id=2,
timestamp=0.2,
ball_owning_team=None,
ball_state=None,
period=periods[0],

away_team_player_positions={'1': Point(x=10, y=20)},
home_team_player_positions={'1': Point(x=15, y=35)},
ball_position=Point(x=0, y=50)
)
],
periods=periods
)
return tracking_data

def test_transform(self):
tracking_data = self._get_dataset()

# orientation change AND dimension scale
transformed_data_set = transform(
tracking_data,
to_orientation="AWAY_TEAM",
to_pitch_dimensions=[[0, 1], [0, 1]]
)

assert transformed_data_set.frames[0].ball_position == Point(x=0, y=1)
assert transformed_data_set.frames[1].ball_position == Point(x=1, y=0)

def test_to_pandas(self):
tracking_data = self._get_dataset()

data_frame = to_pandas(tracking_data)

expected_data_frame = DataFrame.from_dict({
'period_id': {0: 1, 1: 1},
'timestamp': {0: 0.1, 1: 0.2},
'ball_state': {0: None, 1: None},
'ball_owning_team': {0: None, 1: None},
'ball_x': {0: 100, 1: 0},
'ball_y': {0: -50, 1: 50},
'player_home_1_x': {0: None, 1: 15.0},
'player_home_1_y': {0: None, 1: 35.0},
'player_away_1_x': {0: None, 1: 10.0},
'player_away_1_y': {0: None, 1: 20.0}
})

assert_frame_equal(data_frame, expected_data_frame)
36 changes: 10 additions & 26 deletions kloppy/tests/test_metrica.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,23 @@
from io import BytesIO
import os

from kloppy import MetricaTrackingSerializer # NOT YET: , MetricaEventSerializer
from kloppy.domain import Period, AttackingDirection, Orientation, Point


class TestMetricaTracking:
def test_correct_deserialization(self):
raw_data_home = BytesIO(b""",,,Home,,Home,,Home,,Home,,Home,,Home,,Home,,Home,,Home,,Home,,Home,,Home,,Home,,Home,,,
,,,11,,1,,2,,3,,4,,5,,6,,7,,8,,9,,10,,12,,13,,14,,,
Period,Frame,Time [s],Player11,,Player1,,Player2,,Player3,,Player4,,Player5,,Player6,,Player7,,Player8,,Player9,,Player10,,Player12,,Player13,,Player14,,Ball,
1,1,0.04,0.00082,0.48238,0.32648,0.65322,0.33701,0.48863,0.30927,0.35529,0.32137,0.21262,0.41094,0.72589,0.41698,0.47843,0.39125,0.3255,0.45388,0.21174,0.52697,0.3798,0.55243,0.43269,NaN,NaN,NaN,NaN,NaN,NaN,0.45472,0.38709
1,2,0.08,0.00096,0.48238,0.32648,0.65322,0.33701,0.48863,0.30927,0.35529,0.32137,0.21262,0.41094,0.72589,0.41698,0.47843,0.39125,0.3255,0.45388,0.21174,0.52697,0.3798,0.55243,0.43269,NaN,NaN,NaN,NaN,NaN,NaN,0.49645,0.40656
1,3,0.12,0.00114,0.48238,0.32648,0.65322,0.33701,0.48863,0.30927,0.35529,0.32137,0.21262,0.41094,0.72589,0.41698,0.47843,0.39125,0.3255,0.45388,0.21174,0.52697,0.3798,0.55243,0.43269,NaN,NaN,NaN,NaN,NaN,NaN,0.53716,0.42556
2,145004,5800.16,0.90492,0.45355,NaN,NaN,0.34089,0.64569,0.31214,0.67501,0.11428,0.92765,0.25757,0.60019,NaN,NaN,0.37398,0.62446,0.17401,0.83396,0.1667,0.76677,NaN,NaN,0.30044,0.68311,0.33637,0.65366,0.34089,0.64569,NaN,NaN
2,145005,5800.2,0.90456,0.45356,NaN,NaN,0.34056,0.64552,0.31171,0.67468,0.11428,0.92765,0.25721,0.60089,NaN,NaN,0.37398,0.62446,0.17358,0.8343,0.16638,0.76665,NaN,NaN,0.30044,0.68311,0.33615,0.65317,0.34056,0.64552,NaN,NaN
2,145006,5800.24,0.90456,0.45356,NaN,NaN,0.33996,0.64544,0.31122,0.67532,0.11428,0.92765,0.25659,0.60072,NaN,NaN,0.37398,0.62446,0.17327,0.8346,0.1659,0.76555,NaN,NaN,0.30044,0.68311,0.33563,0.65166,0.33996,0.64544,NaN,NaN""")

raw_data_away = BytesIO(b""",,,Away,,Away,,Away,,Away,,Away,,Away,,Away,,Away,,Away,,Away,,Away,,Away,,Away,,Away,,Away,
,,,25,,15,,16,,17,,18,,19,,20,,21,,22,,23,,24,,26,,27,,28,,,
Period,Frame,Time [s],Player25,,Player15,,Player16,,Player17,,Player18,,Player19,,Player20,,Player21,,Player22,,Player23,,Player24,,Player26,,Player27,,Player28,,Ball,
1,1,0.04,0.90509,0.47462,0.58393,0.20794,0.67658,0.4671,0.6731,0.76476,0.40783,0.61525,0.45472,0.38709,0.5596,0.67775,0.55243,0.43269,0.50067,0.94322,0.43693,0.05002,0.37833,0.27383,NaN,NaN,NaN,NaN,NaN,NaN,0.45472,0.38709
1,2,0.08,0.90494,0.47462,0.58393,0.20794,0.67658,0.4671,0.6731,0.76476,0.40783,0.61525,0.45472,0.38709,0.5596,0.67775,0.55243,0.43269,0.50067,0.94322,0.43693,0.05002,0.37833,0.27383,NaN,NaN,NaN,NaN,NaN,NaN,0.49645,0.40656
1,3,0.12,0.90434,0.47463,0.58393,0.20794,0.67658,0.4671,0.6731,0.76476,0.40783,0.61525,0.45472,0.38709,0.5596,0.67775,0.55243,0.43269,0.50067,0.94322,0.43693,0.05002,0.37833,0.27383,NaN,NaN,NaN,NaN,NaN,NaN,0.53716,0.42556
2,145004,5800.16,0.12564,0.55386,0.17792,0.56682,0.25757,0.60019,0.0988,0.92391,0.21235,0.77391,NaN,NaN,0.14926,0.56204,0.10285,0.81944,NaN,NaN,0.29331,0.488,NaN,NaN,0.35561,0.55254,0.19805,0.452,0.21798,0.81079,NaN,NaN
2,145005,5800.2,0.12564,0.55386,0.1773,0.56621,0.25721,0.60089,0.0988,0.92391,0.21235,0.77391,NaN,NaN,0.14857,0.56068,0.10231,0.81944,NaN,NaN,0.29272,0.48789,NaN,NaN,0.35532,0.55243,0.19766,0.45237,0.21798,0.81079,NaN,NaN
2,145006,5800.24,0.12564,0.55386,0.17693,0.56675,0.25659,0.60072,0.0988,0.92391,0.21235,0.77391,NaN,NaN,0.14846,0.56017,0.10187,0.8198,NaN,NaN,0.29267,0.48903,NaN,NaN,0.35495,0.55364,0.19754,0.45364,0.21798,0.81079,NaN,NaN""")
base_dir = os.path.dirname(__file__)

serializer = MetricaTrackingSerializer()

data_set = serializer.deserialize(
inputs={
'raw_data_home': raw_data_home,
'raw_data_away': raw_data_away
}
)
with open(f'{base_dir}/files/metrica_home.csv', 'rb') as raw_data_home, \
open(f'{base_dir}/files/metrica_away.csv', 'rb') as raw_data_away:
data_set = serializer.deserialize(
inputs={
'raw_data_home': raw_data_home,
'raw_data_away': raw_data_away
}
)

assert len(data_set.records) == 6
assert len(data_set.periods) == 2
Expand Down
Loading

0 comments on commit 1e32fdb

Please sign in to comment.