From f5157cff9e7f91f22ced9b9a36b7c00844ce4d69 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 6 Jun 2024 09:52:32 -0300 Subject: [PATCH 01/10] try specifying via env and not via command line --- .github/workflows/integration.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index f53f4aeb505d2..c210e26fe61db 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -99,11 +99,11 @@ jobs: env: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} + ARCHERY_INTEGRATION_WITH_NANOARROW: "1" + ARCHERY_INTEGRATION_WITH_RUST: "1" run: > archery docker run \ -e ARCHERY_DEFAULT_BRANCH=${{ github.event.repository.default_branch }} \ - -e ARCHERY_INTEGRATION_WITH_NANOARROW=1 \ - -e ARCHERY_INTEGRATION_WITH_RUST=1 \ conda-integration - name: Docker Push if: >- From 365a4a6bb4b99290c7bcb05a8761427701b427ba Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 6 Jun 2024 10:19:50 -0300 Subject: [PATCH 02/10] one more approach --- .github/workflows/integration.yml | 2 -- ci/scripts/nanoarrow_build.sh | 8 +++----- ci/scripts/rust_build.sh | 8 +++----- docker-compose.yml | 2 -- 4 files changed, 6 insertions(+), 14 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index c210e26fe61db..6f2db48cfc11d 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -99,8 +99,6 @@ jobs: env: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} - ARCHERY_INTEGRATION_WITH_NANOARROW: "1" - ARCHERY_INTEGRATION_WITH_RUST: "1" run: > archery docker run \ -e ARCHERY_DEFAULT_BRANCH=${{ github.event.repository.default_branch }} \ diff --git a/ci/scripts/nanoarrow_build.sh b/ci/scripts/nanoarrow_build.sh index 1612b9a2d0102..09a2e5842790c 100755 --- a/ci/scripts/nanoarrow_build.sh +++ b/ci/scripts/nanoarrow_build.sh @@ -29,16 +29,14 @@ build_dir=${2}/nanoarrow if [ "${ARCHERY_INTEGRATION_WITH_NANOARROW}" -eq "0" ]; then echo "=====================================================================" - echo "Not building nanoarrow" + echo "Not building nanoarrow because ARCHERY_INTEGRATION_WITH_NANOARROW=0" echo "=====================================================================" exit 0; elif [ ! -d "${source_dir}" ]; then echo "=====================================================================" - echo "The nanoarrow source is missing. Please clone the arrow-nanoarrow repository" - echo "to arrow/nanoarrow before running the integration tests:" - echo " git clone https://github.com/apache/arrow-nanoarrow.git path/to/arrow/nanoarrow" + echo "Not building nanoarrow because '${source_dir}' does not exist" echo "=====================================================================" - exit 1; + exit 0; fi set -x diff --git a/ci/scripts/rust_build.sh b/ci/scripts/rust_build.sh index 5fc21d454b080..299d3a14c8f73 100755 --- a/ci/scripts/rust_build.sh +++ b/ci/scripts/rust_build.sh @@ -35,16 +35,14 @@ export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data if [ "${ARCHERY_INTEGRATION_WITH_RUST}" -eq "0" ]; then echo "=====================================================================" - echo "Not building the Rust implementation." + echo "Not building Rust because ARCHERY_INTEGRATION_WITH_RUST=0" echo "=====================================================================" exit 0; elif [ ! -d "${source_dir}" ]; then echo "=====================================================================" - echo "The Rust source is missing. Please clone the arrow-rs repository" - echo "to arrow/rust before running the integration tests:" - echo " git clone https://github.com/apache/arrow-rs.git path/to/arrow/rust" + echo "Not building Rust because '${source_dir}' does not exist" echo "=====================================================================" - exit 1; + exit 0; fi set -x diff --git a/docker-compose.yml b/docker-compose.yml index fa248d59037d3..d0a2eac75044f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1782,8 +1782,6 @@ services: volumes: *conda-volumes environment: <<: [*common, *ccache] - ARCHERY_INTEGRATION_WITH_NANOARROW: 0 - ARCHERY_INTEGRATION_WITH_RUST: 0 # Tell Archery where Arrow binaries are located ARROW_CPP_EXE_PATH: /build/cpp/debug ARROW_NANOARROW_PATH: /build/nanoarrow From e9c0994275e902a04dc8e447843429c8e4a99f88 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 6 Jun 2024 10:27:45 -0300 Subject: [PATCH 03/10] ensure int expression --- ci/scripts/nanoarrow_build.sh | 5 +++++ ci/scripts/rust_build.sh | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/ci/scripts/nanoarrow_build.sh b/ci/scripts/nanoarrow_build.sh index 09a2e5842790c..a9001a877874f 100755 --- a/ci/scripts/nanoarrow_build.sh +++ b/ci/scripts/nanoarrow_build.sh @@ -27,6 +27,11 @@ build_dir=${2}/nanoarrow # integration tests. Testing of the nanoarrow implementation in normal CI is handled # by github workflows in the arrow-nanoarrow repository. +# Include in the integration test by default if the source directory is present +if [ -z "${ARCHERY_INTEGRATION_WITH_NANOARROW}" ]; then + ARCHERY_INTEGRATION_WITH_NANOARROW="1" +fi + if [ "${ARCHERY_INTEGRATION_WITH_NANOARROW}" -eq "0" ]; then echo "=====================================================================" echo "Not building nanoarrow because ARCHERY_INTEGRATION_WITH_NANOARROW=0" diff --git a/ci/scripts/rust_build.sh b/ci/scripts/rust_build.sh index 299d3a14c8f73..abe9389c32c73 100755 --- a/ci/scripts/rust_build.sh +++ b/ci/scripts/rust_build.sh @@ -33,6 +33,11 @@ export RUSTFLAGS="-C debuginfo=1" export ARROW_TEST_DATA=${arrow_dir}/testing/data export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data +# Include in the integration test by default if the source directory is present +if [ -z "${ARCHERY_INTEGRATION_WITH_RUST}" ]; then + ARCHERY_INTEGRATION_WITH_RUST="1" +fi + if [ "${ARCHERY_INTEGRATION_WITH_RUST}" -eq "0" ]; then echo "=====================================================================" echo "Not building Rust because ARCHERY_INTEGRATION_WITH_RUST=0" From 530d5cee6ad916c0216cde1baebfcce616118609 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 6 Jun 2024 10:36:08 -0300 Subject: [PATCH 04/10] maybe fix rust version --- ci/docker/conda-integration.dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/docker/conda-integration.dockerfile b/ci/docker/conda-integration.dockerfile index 30b9cd5199fab..beeff0927d30c 100644 --- a/ci/docker/conda-integration.dockerfile +++ b/ci/docker/conda-integration.dockerfile @@ -44,8 +44,9 @@ RUN mamba install -q -y \ # Install Rust with only the needed components # (rustfmt is needed for tonic-build to compile the protobuf definitions) +# GH-41637: Version pinned at 1.77 because the glibc for conda-cpp is currently too old RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --profile=minimal -y && \ - $HOME/.cargo/bin/rustup toolchain install stable && \ + $HOME/.cargo/bin/rustup toolchain install 1.77 && \ $HOME/.cargo/bin/rustup component add rustfmt ENV GOROOT=/opt/go \ From 82f6eea123a999a684a78f8970fc4041e42f4650 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 6 Jun 2024 10:52:01 -0300 Subject: [PATCH 05/10] maybe use the old one by default --- ci/docker/conda-integration.dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/docker/conda-integration.dockerfile b/ci/docker/conda-integration.dockerfile index beeff0927d30c..78d2503b23df7 100644 --- a/ci/docker/conda-integration.dockerfile +++ b/ci/docker/conda-integration.dockerfile @@ -46,6 +46,7 @@ RUN mamba install -q -y \ # (rustfmt is needed for tonic-build to compile the protobuf definitions) # GH-41637: Version pinned at 1.77 because the glibc for conda-cpp is currently too old RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --profile=minimal -y && \ + $HOME/.cargo/bin/rustup override set 1.77 && \ $HOME/.cargo/bin/rustup toolchain install 1.77 && \ $HOME/.cargo/bin/rustup component add rustfmt From 8b2f972ecfbdd59d62a4c3b78e5b6b25d1ac0935 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 6 Jun 2024 12:19:20 -0300 Subject: [PATCH 06/10] try specifying --- .github/workflows/integration.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 6f2db48cfc11d..f53f4aeb505d2 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -102,6 +102,8 @@ jobs: run: > archery docker run \ -e ARCHERY_DEFAULT_BRANCH=${{ github.event.repository.default_branch }} \ + -e ARCHERY_INTEGRATION_WITH_NANOARROW=1 \ + -e ARCHERY_INTEGRATION_WITH_RUST=1 \ conda-integration - name: Docker Push if: >- From d71259c2c5ca899a5c23e92fbc9486243e133cb1 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 11 Jun 2024 10:31:24 -0300 Subject: [PATCH 07/10] Apply suggestions from code review Co-authored-by: Sutou Kouhei --- ci/scripts/nanoarrow_build.sh | 2 +- ci/scripts/rust_build.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/scripts/nanoarrow_build.sh b/ci/scripts/nanoarrow_build.sh index a9001a877874f..7b39543cb6e0c 100755 --- a/ci/scripts/nanoarrow_build.sh +++ b/ci/scripts/nanoarrow_build.sh @@ -28,7 +28,7 @@ build_dir=${2}/nanoarrow # by github workflows in the arrow-nanoarrow repository. # Include in the integration test by default if the source directory is present -if [ -z "${ARCHERY_INTEGRATION_WITH_NANOARROW}" ]; then +if [ -z "${ARCHERY_INTEGRATION_WITH_NANOARROW}" ]; then ARCHERY_INTEGRATION_WITH_NANOARROW="1" fi diff --git a/ci/scripts/rust_build.sh b/ci/scripts/rust_build.sh index abe9389c32c73..a883f003ace32 100755 --- a/ci/scripts/rust_build.sh +++ b/ci/scripts/rust_build.sh @@ -34,7 +34,7 @@ export ARROW_TEST_DATA=${arrow_dir}/testing/data export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data # Include in the integration test by default if the source directory is present -if [ -z "${ARCHERY_INTEGRATION_WITH_RUST}" ]; then +if [ -z "${ARCHERY_INTEGRATION_WITH_RUST}" ]; then ARCHERY_INTEGRATION_WITH_RUST="1" fi From e4b1fa1821a6f444594f4f5ca9544b4f31ce0303 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 17 Jun 2024 12:10:33 -0300 Subject: [PATCH 08/10] revert changes --- ci/scripts/nanoarrow_build.sh | 13 +++++-------- ci/scripts/rust_build.sh | 13 +++++-------- docker-compose.yml | 2 ++ 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/ci/scripts/nanoarrow_build.sh b/ci/scripts/nanoarrow_build.sh index 7b39543cb6e0c..1612b9a2d0102 100755 --- a/ci/scripts/nanoarrow_build.sh +++ b/ci/scripts/nanoarrow_build.sh @@ -27,21 +27,18 @@ build_dir=${2}/nanoarrow # integration tests. Testing of the nanoarrow implementation in normal CI is handled # by github workflows in the arrow-nanoarrow repository. -# Include in the integration test by default if the source directory is present -if [ -z "${ARCHERY_INTEGRATION_WITH_NANOARROW}" ]; then - ARCHERY_INTEGRATION_WITH_NANOARROW="1" -fi - if [ "${ARCHERY_INTEGRATION_WITH_NANOARROW}" -eq "0" ]; then echo "=====================================================================" - echo "Not building nanoarrow because ARCHERY_INTEGRATION_WITH_NANOARROW=0" + echo "Not building nanoarrow" echo "=====================================================================" exit 0; elif [ ! -d "${source_dir}" ]; then echo "=====================================================================" - echo "Not building nanoarrow because '${source_dir}' does not exist" + echo "The nanoarrow source is missing. Please clone the arrow-nanoarrow repository" + echo "to arrow/nanoarrow before running the integration tests:" + echo " git clone https://github.com/apache/arrow-nanoarrow.git path/to/arrow/nanoarrow" echo "=====================================================================" - exit 0; + exit 1; fi set -x diff --git a/ci/scripts/rust_build.sh b/ci/scripts/rust_build.sh index a883f003ace32..5fc21d454b080 100755 --- a/ci/scripts/rust_build.sh +++ b/ci/scripts/rust_build.sh @@ -33,21 +33,18 @@ export RUSTFLAGS="-C debuginfo=1" export ARROW_TEST_DATA=${arrow_dir}/testing/data export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data -# Include in the integration test by default if the source directory is present -if [ -z "${ARCHERY_INTEGRATION_WITH_RUST}" ]; then - ARCHERY_INTEGRATION_WITH_RUST="1" -fi - if [ "${ARCHERY_INTEGRATION_WITH_RUST}" -eq "0" ]; then echo "=====================================================================" - echo "Not building Rust because ARCHERY_INTEGRATION_WITH_RUST=0" + echo "Not building the Rust implementation." echo "=====================================================================" exit 0; elif [ ! -d "${source_dir}" ]; then echo "=====================================================================" - echo "Not building Rust because '${source_dir}' does not exist" + echo "The Rust source is missing. Please clone the arrow-rs repository" + echo "to arrow/rust before running the integration tests:" + echo " git clone https://github.com/apache/arrow-rs.git path/to/arrow/rust" echo "=====================================================================" - exit 0; + exit 1; fi set -x diff --git a/docker-compose.yml b/docker-compose.yml index d0a2eac75044f..fa248d59037d3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1782,6 +1782,8 @@ services: volumes: *conda-volumes environment: <<: [*common, *ccache] + ARCHERY_INTEGRATION_WITH_NANOARROW: 0 + ARCHERY_INTEGRATION_WITH_RUST: 0 # Tell Archery where Arrow binaries are located ARROW_CPP_EXE_PATH: /build/cpp/debug ARROW_NANOARROW_PATH: /build/nanoarrow From 1c1b7c1c37bbf62fb0538728e330e7c410095ea8 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 17 Jun 2024 12:15:04 -0300 Subject: [PATCH 09/10] apply kou's suggestion --- dev/archery/archery/docker/core.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/dev/archery/archery/docker/core.py b/dev/archery/archery/docker/core.py index cb831060022a4..ec422e9aaf280 100644 --- a/dev/archery/archery/docker/core.py +++ b/dev/archery/archery/docker/core.py @@ -340,18 +340,9 @@ def run(self, service_name, command=None, *, env=None, volumes=None, service = self.config.get(service_name) args = [] - if user is not None: - args.extend(['-u', user]) - - if env is not None: - for k, v in env.items(): - args.extend(['-e', '{}={}'.format(k, v)]) - if volumes is not None: - for volume in volumes: - args.extend(['--volume', volume]) - - if self.config.using_docker or service['need_gpu'] or resource_limit: + use_docker = self.config.using_docker or service['need_gpu'] or resource_limit + if use_docker: # use gpus, requires docker>=19.03 if service['need_gpu']: args.extend(['--gpus', 'all']) @@ -396,6 +387,18 @@ def run(self, service_name, command=None, *, env=None, volumes=None, # name which we refer as image in general args.append(service['image']) + if user is not None: + args.extend(['-u', user]) + + if env is not None: + for k, v in env.items(): + args.extend(['-e', '{}={}'.format(k, v)]) + + if volumes is not None: + for volume in volumes: + args.extend(['--volume', volume]) + + if use_docker: # add command from compose if it wasn't overridden if command is not None: args.append(command) From 74b2f96704d100cd2315c817a3d9535aec42f47e Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Tue, 18 Jun 2024 15:26:22 -0300 Subject: [PATCH 10/10] try kou's suggestion --- dev/archery/archery/docker/core.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dev/archery/archery/docker/core.py b/dev/archery/archery/docker/core.py index ec422e9aaf280..5be4887ea4f63 100644 --- a/dev/archery/archery/docker/core.py +++ b/dev/archery/archery/docker/core.py @@ -383,10 +383,6 @@ def run(self, service_name, command=None, *, env=None, volumes=None, args.append(f'--memory={memory}') args.append(f'--memory-swap={memory}') - # get the actual docker image name instead of the compose service - # name which we refer as image in general - args.append(service['image']) - if user is not None: args.extend(['-u', user]) @@ -399,6 +395,10 @@ def run(self, service_name, command=None, *, env=None, volumes=None, args.extend(['--volume', volume]) if use_docker: + # get the actual docker image name instead of the compose service + # name which we refer as image in general + args.append(service['image']) + # add command from compose if it wasn't overridden if command is not None: args.append(command)