From 788c8f2c70a0460895a7811f529241343a65600e Mon Sep 17 00:00:00 2001 From: Joel Lubinitsky <33523178+joellubi@users.noreply.github.com> Date: Wed, 10 Jul 2024 17:41:25 -0400 Subject: [PATCH] GH-43186: [Go] Use auto-aligned atomic int64 for pqarrow pathbuilders (#43206) ### Rationale for this change Fixes: #43186 Improves 32-bit support for the pqarrow writer. We may want to push similar changes to other refcount implementations to more completely support running on 32-bit system. ### What changes are included in this PR? Update `pathBuilder` and `multipathLevelBuilder` refCounts to use `atomic.Int64` which is automatically aligned on 32-bit systems. ### Are these changes tested? The issue reproduces with existing tests when building for a 32-bit architecture, so no new tests were added. This PR adds a "test" step to the existing workflow for 386 architecture builds, currently limited to run the tests fixed in this PR. ### Are there any user-facing changes? 32-bit systems should no longer panic when writing parquet files. * GitHub Issue: #43186 --- .github/workflows/go.yml | 13 ++++++----- go/internal/utils/ref_count.go | 26 ++++++++++++++++++++++ go/parquet/internal/bmi/bitmap_bmi2_386.go | 25 +++++++++++++++++++++ go/parquet/pqarrow/path_builder.go | 17 +++++++------- 4 files changed, 68 insertions(+), 13 deletions(-) create mode 100644 go/internal/utils/ref_count.go create mode 100644 go/parquet/internal/bmi/bitmap_bmi2_386.go diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 0d32628859fa0..c247a89128b34 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -168,8 +168,8 @@ jobs: python3 -m pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python python3 ci/scripts/go_bench_adapt.py - build386: - name: Go Cross-build for 386 + build_test_386: + name: Go Cross-build and test for 386 runs-on: ubuntu-latest if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 20 @@ -188,9 +188,12 @@ jobs: cache: true cache-dependency-path: go/go.sum - name: Run build - run: | - cd go - GOARCH=386 go build ./... + run: GOARCH=386 go build ./... + working-directory: ./go + - name: Run test + # WIP refactor, only tests in the specified dirs have been fixed + run: GOARCH=386 go test ./parquet/file/... + working-directory: ./go docker_cgo: name: AMD64 Debian 12 Go ${{ matrix.go }} - CGO diff --git a/go/internal/utils/ref_count.go b/go/internal/utils/ref_count.go new file mode 100644 index 0000000000000..9b85f75b14363 --- /dev/null +++ b/go/internal/utils/ref_count.go @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import "sync/atomic" + +// NewRefCount creates a new atomic counter set to the specified initial value. +func NewRefCount(initial int64) *atomic.Int64 { + var val atomic.Int64 + val.Store(initial) + return &val +} diff --git a/go/parquet/internal/bmi/bitmap_bmi2_386.go b/go/parquet/internal/bmi/bitmap_bmi2_386.go new file mode 100644 index 0000000000000..60f898f6bd557 --- /dev/null +++ b/go/parquet/internal/bmi/bitmap_bmi2_386.go @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !noasm +// +build !noasm + +package bmi + +func init() { + funclist.extractBits = extractBitsGo + funclist.gtbitmap = greaterThanBitmapGo +} diff --git a/go/parquet/pqarrow/path_builder.go b/go/parquet/pqarrow/path_builder.go index 13f2beca024f1..c28072afe2c24 100644 --- a/go/parquet/pqarrow/path_builder.go +++ b/go/parquet/pqarrow/path_builder.go @@ -25,6 +25,7 @@ import ( "github.com/apache/arrow/go/v17/arrow/array" "github.com/apache/arrow/go/v17/arrow/memory" "github.com/apache/arrow/go/v17/internal/bitutils" + "github.com/apache/arrow/go/v17/internal/utils" "github.com/apache/arrow/go/v17/parquet/internal/encoding" "golang.org/x/xerrors" ) @@ -301,15 +302,15 @@ type pathBuilder struct { paths []pathInfo nullableInParent bool - refCount int64 + refCount *atomic.Int64 } func (p *pathBuilder) Retain() { - atomic.AddInt64(&p.refCount, 1) + p.refCount.Add(1) } func (p *pathBuilder) Release() { - if atomic.AddInt64(&p.refCount, -1) == 0 { + if p.refCount.Add(-1) == 0 { for idx := range p.paths { p.paths[idx].primitiveArr.Release() p.paths[idx].primitiveArr = nil @@ -498,15 +499,15 @@ type multipathLevelBuilder struct { data arrow.ArrayData builder pathBuilder - refCount int64 + refCount *atomic.Int64 } func (m *multipathLevelBuilder) Retain() { - atomic.AddInt64(&m.refCount, 1) + m.refCount.Add(1) } func (m *multipathLevelBuilder) Release() { - if atomic.AddInt64(&m.refCount, -1) == 0 { + if m.refCount.Add(-1) == 0 { m.data.Release() m.data = nil m.builder.Release() @@ -516,10 +517,10 @@ func (m *multipathLevelBuilder) Release() { func newMultipathLevelBuilder(arr arrow.Array, fieldNullable bool) (*multipathLevelBuilder, error) { ret := &multipathLevelBuilder{ - refCount: 1, + refCount: utils.NewRefCount(1), rootRange: elemRange{int64(0), int64(arr.Data().Len())}, data: arr.Data(), - builder: pathBuilder{nullableInParent: fieldNullable, paths: make([]pathInfo, 0), refCount: 1}, + builder: pathBuilder{nullableInParent: fieldNullable, paths: make([]pathInfo, 0), refCount: utils.NewRefCount(1)}, } if err := ret.builder.Visit(arr); err != nil { return nil, err