Skip to content

Commit

Permalink
GH-41910: [Python] Add support for Pyodide (#37822)
Browse files Browse the repository at this point in the history
pyarrow knows about ARROW_ENABLE_THREADING and doesn't use threads if they are not enabled in libarrow.

Split from #37696 

* GitHub Issue: #41910

Lead-authored-by: Joe Marshall <[email protected]>
Co-authored-by: Joris Van den Bossche <[email protected]>
Co-authored-by: Raúl Cumplido <[email protected]>
Co-authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
  • Loading branch information
4 people authored Jul 5, 2024
1 parent 5b5c164 commit 2de8008
Show file tree
Hide file tree
Showing 42 changed files with 940 additions and 65 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ repos:
# files: (/Dockerfile|\.dockerfile)$
files: >-
(
?^ci/docker/conda-python-emscripten\.dockerfile$|
?^ci/docker/python-wheel-windows-test-vs2019\.dockerfile$|
)
types: []
Expand Down
64 changes: 64 additions & 0 deletions ci/docker/conda-python-emscripten.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

ARG repo
ARG arch
ARG python="3.12"
FROM ${repo}:${arch}-conda-python-${python}

ARG selenium_version="4.15.2"
ARG pyodide_version="0.26.0"
ARG chrome_version="latest"
ARG required_python_min="(3,12)"
# fail if python version < 3.12
RUN echo "check PYTHON>=${required_python_min}" && python -c "import sys;sys.exit(0 if sys.version_info>=${required_python_min} else 1)"

# install selenium and pyodide-build and recent python

# needs to be a login shell so ~/.profile is read
SHELL ["/bin/bash", "--login", "-c", "-o", "pipefail"]

RUN python -m pip install --no-cache-dir selenium==${selenium_version} && \
python -m pip install --no-cache-dir --upgrade pyodide-build==${pyodide_version}

# install pyodide dist directory to /pyodide
RUN pyodide_dist_url="https://github.com/pyodide/pyodide/releases/download/${pyodide_version}/pyodide-${pyodide_version}.tar.bz2" && \
wget -q "${pyodide_dist_url}" -O- | tar -xj -C /

# install correct version of emscripten for this pyodide
COPY ci/scripts/install_emscripten.sh /arrow/ci/scripts/
RUN bash /arrow/ci/scripts/install_emscripten.sh ~ /pyodide

# make sure zlib is cached in the EMSDK folder
RUN source ~/emsdk/emsdk_env.sh && embuilder --pic build zlib

# install node 20 (needed for async call support)
# and pthread-stubs for build, and unzip needed for chrome build to work
RUN conda install nodejs=20 unzip pthread-stubs make -c conda-forge

# install chrome for testing browser based runner
COPY ci/scripts/install_chromedriver.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_chromedriver.sh "${chrome_version}"

# make the version of make that is installed by conda be available everywhere
# or else pyodide's isolated build fails to find it
RUN ln -s "$(type -P make)" /bin/make

ENV ARROW_BUILD_TESTS="OFF" \
ARROW_BUILD_TYPE="release" \
ARROW_DEPENDENCY_SOURCE="BUNDLED" \
ARROW_EMSCRIPTEN="ON"
6 changes: 5 additions & 1 deletion ci/scripts/cpp_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ if [ -x "$(command -v git)" ]; then
fi

# TODO(kszucs): consider to move these to CMake
if [ ! -z "${CONDA_PREFIX}" ]; then
if [ ! -z "${CONDA_PREFIX}" ] && [ "${ARROW_EMSCRIPTEN:-OFF}" = "OFF" ]; then
echo -e "===\n=== Conda environment for build\n==="
conda list

Expand Down Expand Up @@ -99,6 +99,10 @@ if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then
fi
n_jobs=2 # Emscripten build fails on docker unless this is set really low
source ~/emsdk/emsdk_env.sh
export CMAKE_INSTALL_PREFIX=$(em-config CACHE)/sysroot
# conda sets LDFLAGS / CFLAGS etc. which break
# emcmake so we unset them
unset LDFLAGS CFLAGS CXXFLAGS CPPFLAGS
emcmake cmake \
--preset=ninja-${ARROW_BUILD_TYPE:-debug}-emscripten \
-DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE:-OFF} \
Expand Down
44 changes: 44 additions & 0 deletions ci/scripts/install_chromedriver.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Install Chrome and Chromedriver for Selenium

set -e

chrome_version=$1

if [ $chrome_version = "latest" ]; then
latest_release_path=LATEST_RELEASE_STABLE
else
latest_release_path=LATEST_RELEASE_${chrome_version}
fi
CHROME_VERSION_FULL=$(wget -q --no-verbose -O - "https://googlechromelabs.github.io/chrome-for-testing/${latest_release_path}")
CHROME_DOWNLOAD_URL="https://dl.google.com/linux/chrome/deb/pool/main/g/google-chrome-stable/google-chrome-stable_${CHROME_VERSION_FULL}-1_amd64.deb"
CHROMEDRIVER_DOWNLOAD_URL="https://storage.googleapis.com/chrome-for-testing-public/${CHROME_VERSION_FULL}/linux64/chromedriver-linux64.zip"
wget -q --no-verbose -O /tmp/google-chrome.deb "${CHROME_DOWNLOAD_URL}"
apt-get update
apt install -qqy /tmp/google-chrome.deb
rm -f /tmp/google-chrome.deb
rm -rf /var/lib/apt/lists/*
wget --no-verbose -O /tmp/chromedriver-linux64.zip "${CHROMEDRIVER_DOWNLOAD_URL}"
unzip /tmp/chromedriver-linux64.zip -d /opt/
rm /tmp/chromedriver-linux64.zip
ln -fs /opt/chromedriver-linux64/chromedriver /usr/local/bin/chromedriver
echo "Using Chrome version: $(google-chrome --version)"
echo "Using Chrome Driver version: $(chromedriver --version)"
36 changes: 36 additions & 0 deletions ci/scripts/install_emscripten.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# install emscripten sdk version to match pyodide in $2 to directory $1/emsdk

set -e

target_path=$1
pyodide_path=$2

emscripten_version=$(${pyodide_path}/python -c "import sys;print(*sys._emscripten_info.emscripten_version,sep='.')")

cd ${target_path}
if [ ! -d emsdk ]; then
git clone https://github.com/emscripten-core/emsdk.git
fi
cd emsdk
./emsdk install ${emscripten_version}
./emsdk activate ${emscripten_version}
echo "Installed emsdk to: ${target_path}"
40 changes: 40 additions & 0 deletions ci/scripts/python_build_emscripten.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

set -ex

arrow_dir=${1}
build_dir=${2}


source ~/emsdk/emsdk_env.sh

source_dir=${arrow_dir}/python
python_build_dir=${build_dir}/python

rm -rf ${python_build_dir}
cp -aL ${source_dir} ${python_build_dir}

# conda sets LDFLAGS / CFLAGS etc. which break
# emcmake so we unset them
unset LDFLAGS CFLAGS CXXFLAGS CPPFLAGS

pushd ${python_build_dir}
pyodide build
popd
38 changes: 38 additions & 0 deletions ci/scripts/python_test_emscripten.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# run tests against Chrome and node.js as representative
# WebAssembly platforms (i.e. one browser, one non-browser).

set -ex

build_dir=${1}/python
pyodide_dist_dir=${2}

cd ${build_dir}

# note: this uses the newest wheel in dist
pyodide_wheel=$(ls -t dist/pyarrow*.whl | head -1)

echo "-------------- Running emscripten tests in Node ----------------------"
python scripts/run_emscripten_tests.py ${pyodide_wheel} --dist-dir=${pyodide_dist_dir} --runtime=node

echo "-------------- Running emscripten tests in Chrome --------------------"
python scripts/run_emscripten_tests.py ${pyodide_wheel} --dist-dir=${pyodide_dist_dir} --runtime=chrome

2 changes: 2 additions & 0 deletions cpp/CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,15 @@
"ARROW_ACERO": "ON",
"ARROW_BUILD_SHARED": "OFF",
"ARROW_BUILD_STATIC": "ON",
"ARROW_CSV": "ON",
"ARROW_CUDA": "OFF",
"ARROW_DEPENDENCY_SOURCE": "BUNDLED",
"ARROW_DEPENDENCY_USE_SHARED": "OFF",
"ARROW_ENABLE_THREADING": "OFF",
"ARROW_FLIGHT": "OFF",
"ARROW_IPC": "ON",
"ARROW_JEMALLOC": "OFF",
"ARROW_JSON": "ON",
"ARROW_MIMALLOC": "OFF",
"ARROW_ORC": "ON",
"ARROW_RUNTIME_SIMD_LEVEL": "NONE",
Expand Down
1 change: 1 addition & 0 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4551,6 +4551,7 @@ macro(build_orc)
BUILD_BYPRODUCTS ${ORC_STATIC_LIB}
CMAKE_ARGS ${ORC_CMAKE_ARGS}
DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF}
${ARROW_PROTOBUF_PROTOC}
${ARROW_ZSTD_LIBZSTD}
${Snappy_TARGET}
LZ4::lz4
Expand Down
9 changes: 9 additions & 0 deletions dev/tasks/tasks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1194,6 +1194,15 @@ tasks:
image: conda-python
{% endfor %}

test-conda-python-emscripten:
ci: github
template: docker-tests/github.linux.yml
params:
env:
UBUNTU: 22.04
PYTHON: 3.12
image: conda-python-emscripten

test-conda-python-3.11-hypothesis:
ci: github
template: docker-tests/github.linux.yml
Expand Down
33 changes: 33 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ x-hierarchy:
- conda-python-docs
- conda-python-cython2
- conda-python-dask
- conda-python-emscripten
- conda-python-hdfs
- conda-python-java-integration
- conda-python-jpype
Expand Down Expand Up @@ -875,6 +876,38 @@ services:
/arrow/ci/scripts/python_build.sh /arrow /build &&
/arrow/ci/scripts/python_test.sh /arrow"]

conda-python-emscripten:
# Usage:
# docker-compose build conda-python-emscripten
# docker-compose run --rm conda-python-emscripten
# Parameters:
# ARCH: amd64, arm64v8, ...
# UBUNTU: 22.04
image: ${REPO}:${ARCH}-conda-python-emscripten
build:
context: .
dockerfile: ci/docker/conda-python-emscripten.dockerfile
cache_from:
- ${REPO}:${ARCH}-conda-python-${PYTHON}
args:
repo: ${REPO}
arch: ${ARCH}
clang_tools: ${CLANG_TOOLS}
llvm: ${LLVM}
pyodide_version: "0.26.0"
chrome_version: "122"
selenium_version: "4.15.2"
required_python_min: "(3,12)"
python: ${PYTHON}
shm_size: *shm-size
volumes: *ubuntu-volumes
environment:
<<: [*common, *ccache, *sccache, *cpp]
command: ["
/arrow/ci/scripts/cpp_build.sh /arrow /build &&
/arrow/ci/scripts/python_build_emscripten.sh /arrow /build &&
/arrow/ci/scripts/python_test_emscripten.sh /build /pyodide"]

ubuntu-cuda-python:
# Usage:
# docker-compose build cuda-cpp
Expand Down
Loading

0 comments on commit 2de8008

Please sign in to comment.