-
Notifications
You must be signed in to change notification settings - Fork 36
154 lines (148 loc) · 5.02 KB
/
build.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
name: build
on:
push:
branches: [ master, develop ]
pull_request:
jobs:
test:
strategy:
matrix:
os: [ubuntu-latest]
python: ['3.9', '3.10', '3.11', '3.12']
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python }}
uses: actions/setup-python@v1
with:
python-version: ${{ matrix.python }}
- uses: actions/cache@v3
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e .[test]
- name: Lint with pre-commit
run: |
make lint
- name: Test with pytest
run: |
pytest -m "not spark"
test_spark:
strategy:
matrix:
include:
- SPARK_VERSION: "3.3.2"
HADOOP_VERSION: "3"
JAVA_VERSION: "11"
python: "3.9"
os: ubuntu-latest
dependency_constraints: '"pandas<2" "numpy<2"'
- SPARK_VERSION: "3.5.4"
HADOOP_VERSION: "3"
JAVA_VERSION: "11"
python: "3.12"
os: ubuntu-latest
dependency_constraints: '"pandas>=2" "numpy>=2"'
runs-on: ${{ matrix.os }}
name: ${{ matrix.os }}, Spark ${{ matrix.SPARK_VERSION}}, Python ${{ matrix.python }}
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python }}
uses: actions/setup-python@v1
with:
python-version: ${{ matrix.python }}
- name: Cache pip + Spark
id: cache-spark
uses: actions/cache@v3
with:
path: |
/home/runner/work/spark.tgz
~/.cache/pip
key: ${{ runner.os }}-spark-${{ matrix.SPARK_VERSION }}-hadoop${{ matrix.HADOOP_VERSION }}-java${{ matrix.JAVA_VERSION }}-${{ hashFiles('**/pyproject.toml') }}
- name: Install pip and setuptools
run: |
python -m pip install --upgrade pip setuptools
- name: Download spark
if: steps.cache-spark.outputs.cache-hit != 'true'
env:
BUILD_DIR: "/home/runner/work/"
SPARK_VERSION: "${{ matrix.SPARK_VERSION }}"
HADOOP_VERSION: "${{ matrix.HADOOP_VERSION }}"
run: |
curl https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz --output ${BUILD_DIR}spark.tgz
- name: Install Spark
env:
BUILD_DIR: "/home/runner/work/"
SPARK_VERSION: "${{ matrix.SPARK_VERSION }}"
HADOOP_VERSION: "${{ matrix.HADOOP_VERSION }}"
SPARK_HOME: "/home/runner/work/spark/"
run: |
sudo apt-get update
sudo apt-get -y install openjdk-${{ matrix.JAVA_VERSION }}-jdk
tar -xvzf ${BUILD_DIR}spark.tgz && mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} ${SPARK_HOME}
pip install "pytest-spark>=0.6.0" "pyarrow>=0.8.0"
# https://github.com/python-poetry/poetry/issues/6792
pip3 install "pypandoc<1.8"
pip install "pyspark==${SPARK_VERSION}"
- name: Install Spark-related dependency versions
run: |
pip install ${{ matrix.dependency_constraints }}
- name: Install project dependencies
run: |
pip install -e .[test]
- name: Test with pytest (spark-specific)
env:
BUILD_DIR: "/home/runner/work/" #${{ github.workspace }}
JAVA_HOME: "/usr/lib/jvm/java-${{ matrix.JAVA_VERSION }}-openjdk-amd64"
SPARK_VERSION: "${{ matrix.SPARK_VERSION }}"
HADOOP_VERSION: "${{ matrix.HADOOP_VERSION }}"
SPARK_HOME: "/home/runner/work/spark/"
SPARK_LOCAL_IP: "localhost"
run: |
pytest -m spark
examples:
runs-on: ubuntu-latest
needs:
- test
- test_spark
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.9
uses: actions/setup-python@v1
with:
python-version: 3.9
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e .
pip install scikit-learn
- name: Download datasets
run: |
wget https://www.win.tue.nl/~mpechen/data/DriftSets/hyperplane1.arff -O examples/synthetic_data_streams/data/hyperplane1.arff
- name: Build examples
run: |
cd examples
python synthetic_data.py
python flight_delays.py
cd synthetic_data_streams
python hyperplane.py
- uses: actions/upload-artifact@v4
with:
name: synthetic-report
path: examples/test_data_report.html
if-no-files-found: error
- uses: actions/upload-artifact@v4
with:
name: flight-delays-report
path: examples/flight_delays_report.html
if-no-files-found: error
- uses: actions/upload-artifact@v4
with:
name: hyperplane-1-report
path: examples/synthetic_data_streams/reports/hyperplane_1.html
if-no-files-found: error