diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml new file mode 100644 index 0000000..41e9d31 --- /dev/null +++ b/.github/workflows/cd.yml @@ -0,0 +1,91 @@ +name: Build and upload releases + +on: + # For pushes to main, build binaries and store them as artifacts (also upload Docker images) + # For pushes to main with tags, also make a GitHub release. + push: + branches: + - main + tags: + - "v*" + +jobs: + build_binary: + name: Build the binaries + runs-on: ${{ matrix.os }} + + strategy: + matrix: + build: [linux-x86_64, macos-arm64] + include: + - build: linux-x86_64 + os: ubuntu-latest + target: x86_64-unknown-linux-gnu + - build: macos-arm64 + os: macos-14 + target: arm64-apple-darwin + + steps: + - name: Checkout the repository + uses: actions/checkout@v4 + + - run: | + rustup toolchain install nightly --profile minimal + rustup default nightly + + - uses: Swatinem/rust-cache@v2 + with: + cache-on-failure: "true" + + - name: Build the release binary + shell: bash + run: | + cargo build --release + + - name: Test invoking the binary + shell: bash + run: | + ./target/release/lakehouse-loader --help || exit 1 + + - name: Prepare artifact name + shell: bash + run: | + echo "ARTIFACT=lakehouse-loader-nightly-${{ matrix.target }}" >> $GITHUB_ENV + echo "SOURCE=target/release/lakehouse-loader" >> $GITHUB_ENV + + - name: Upload binaries as artifacts + uses: actions/upload-artifact@v4 + with: + name: ${{ env.ARTIFACT }} + path: ${{ env.SOURCE }} + + github_release: + name: Perform GitHub release + needs: build_binary + runs-on: ubuntu-latest + if: startsWith(github.event.ref, 'refs/tags/v') + steps: + - name: Get the release version from the tag + shell: bash + if: env.RELEASE_VERSION == '' + run: | + echo "RELEASE_VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV + echo "version is: ${{ env.RELEASE_VERSION }}" + + - name: Get artifacts + uses: actions/download-artifact@v4 + with: + path: artifacts + + - name: Package artifacts + run: | + chmod +x artifacts/lakehouse-loader-nightly-x86_64-unknown-linux-gnu/lakehouse-loader + tar -C artifacts/lakehouse-loader-nightly-x86_64-unknown-linux-gnu -czf lakehouse-loader-${{ env.RELEASE_VERSION }}-x86_64-unknown-linux-gnu.tar.gz lakehouse-loader + tar -C artifacts/lakehouse-loader-nightly-arm64-apple-darwin -czf lakehouse-loader-${{ env.RELEASE_VERSION }}-arm64-apple-darwin.tar.gz lakehouse-loader + + - name: Upload release archive + uses: softprops/action-gh-release@v1 + with: + files: | + lakehouse-loader-${{ env.RELEASE_VERSION }}-x86_64-unknown-linux-gnu.tar.gz + lakehouse-loader-${{ env.RELEASE_VERSION }}-arm64-apple-darwin.tar.gz diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..cfc649e --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,54 @@ +name: CI + +on: + push: + +jobs: + CI: + name: Lint, build, test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install pre-commit + run: | + sudo apt-get update + sudo apt-get install -y pre-commit + + - name: Configure pre-commit cache + uses: actions/cache@v4 + with: + path: ~/.cache/pre-commit + key: pre-commit-${{ runner.os }}-pre-commit-${{ hashFiles('**/.pre-commit-config.yaml') }} + + - name: Install minimal nightly with clippy and rustfmt + uses: dtolnay/rust-toolchain@master + with: + toolchain: nightly + components: rustfmt, clippy + + - name: Install tool for formatting Cargo.toml files + run: cargo install cargo-tomlfmt + + - uses: Swatinem/rust-cache@v2 + with: + cache-on-failure: "true" + + - name: Check pre-commit hooks (formatting and Clippy) + run: pre-commit run --all + + - name: Build in debug mode + run: | + cargo build + + - name: Spin up the test object store and postgres server + run: docker compose up -d --wait || true + + - name: Run tests + run: | + cargo test + env: + AWS_ALLOW_HTTP: "true" + AWS_ENDPOINT: http://localhost:9000 + AWS_ACCESS_KEY_ID: minioadmin + AWS_SECRET_ACCESS_KEY: minioadmin diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..ad29be3 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,29 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: v2.7.1 + hooks: + - id: prettier + files: \.(json|markdown|md|yaml|yml)$ + exclude: tests/data + - repo: local + hooks: + - id: fmt + name: fmt + entry: cargo fmt + language: system + types: [rust] + args: ["--"] + + - id: tomlfmt + name: tomlfmt + entry: find . -name 'Cargo.toml' -exec cargo tomlfmt -p {} \; + language: system + pass_filenames: false + + # Run Clippy (lint), which also runs cargo check (check the code compiles) + - id: clippy + name: clippy + entry: ./ci/clippy.sh + language: system + types: [rust] + pass_filenames: false diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..00ff4c2 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,3108 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "addr2line" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "ahash" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + +[[package]] +name = "ahash" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42cd52102d3df161c77a887b608d7a4897d7cc112886a9537b738a887a03aaff" +dependencies = [ + "cfg-if", + "const-random", + "getrandom", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +dependencies = [ + "memchr", +] + +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + +[[package]] +name = "allocator-api2" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstream" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" + +[[package]] +name = "anstyle-parse" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +dependencies = [ + "anstyle", + "windows-sys 0.52.0", +] + +[[package]] +name = "arrow" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa285343fba4d829d49985bdc541e3789cf6000ed0e84be7c039438df4a4e78c" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "753abd0a5290c1bcade7c6623a556f7d1659c5f4148b140b5b63ce7bd1a45705" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "num", +] + +[[package]] +name = "arrow-array" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d390feeb7f21b78ec997a4081a025baef1e2e0d6069e181939b61864c9779609" +dependencies = [ + "ahash 0.8.8", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "hashbrown 0.14.3", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69615b061701bcdffbc62756bc7e85c827d5290b472b580c972ebbbf690f5aa4" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-cast" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e448e5dd2f4113bf5b74a1f26531708f5edcacc77335b7066f9398f4bcf4cdef" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "base64 0.21.7", + "chrono", + "half", + "lexical-core", + "num", +] + +[[package]] +name = "arrow-csv" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46af72211f0712612f5b18325530b9ad1bfbdc87290d5fbfd32a7da128983781" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "csv", + "csv-core", + "lazy_static", + "lexical-core", + "regex", +] + +[[package]] +name = "arrow-data" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67d644b91a162f3ad3135ce1184d0a31c28b816a581e08f29e8e9277a574c64e" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num", +] + +[[package]] +name = "arrow-ipc" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03dea5e79b48de6c2e04f03f62b0afea7105be7b77d134f6c5414868feefb80d" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "flatbuffers", +] + +[[package]] +name = "arrow-json" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8950719280397a47d37ac01492e3506a8a724b3fb81001900b866637a829ee0f" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "indexmap", + "lexical-core", + "num", + "serde", + "serde_json", +] + +[[package]] +name = "arrow-ord" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ed9630979034077982d8e74a942b7ac228f33dd93a93b615b4d02ad60c260be" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "half", + "num", +] + +[[package]] +name = "arrow-row" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "007035e17ae09c4e8993e4cb8b5b96edf0afb927cd38e2dff27189b274d83dcf" +dependencies = [ + "ahash 0.8.8", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", + "hashbrown 0.14.3", +] + +[[package]] +name = "arrow-schema" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ff3e9c01f7cd169379d269f926892d0e622a704960350d09d331be3ec9e0029" +dependencies = [ + "serde", +] + +[[package]] +name = "arrow-select" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ce20973c1912de6514348e064829e50947e35977bb9d7fb637dc99ea9ffd78c" +dependencies = [ + "ahash 0.8.8", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", +] + +[[package]] +name = "arrow-string" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00f3b37f2aeece31a2636d1b037dabb69ef590e03bdc7eb68519b51ec86932a7" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "num", + "regex", + "regex-syntax", +] + +[[package]] +name = "async-trait" +version = "0.1.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "backtrace" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "brotli" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "2.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + +[[package]] +name = "bumpalo" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" + +[[package]] +name = "bytemuck" +version = "1.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2ef034f05691a48569bd920a96c81b9d91bbad1ab5ac7c4616c1f6ef36cb79f" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" + +[[package]] +name = "cc" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "jobserver", + "libc", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bc015644b92d5890fab7489e49d21f879d5c990186827d42ec511919404f38b" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "num-traits", + "serde", + "windows-targets 0.52.0", +] + +[[package]] +name = "clap" +version = "4.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80c21025abd42669a92efc996ef13cfb2c5c627858421ea58d5c3b331a6c134f" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "458bf1f341769dfcf849846f65dffdf9146daa56bcd2a47cb4e1de9915567c99" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "307bc0538d5f0f83b8248db3087aa92fe504e4691294d0c96c0eabc33f47ba47" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.48", +] + +[[package]] +name = "clap_lex" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + +[[package]] +name = "config" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23738e11972c7643e4ec947840fc463b6a571afcd3e735bdfce7d03c7a784aca" +dependencies = [ + "async-trait", + "json5", + "lazy_static", + "nom", + "pathdiff", + "ron", + "rust-ini", + "serde", + "serde_json", + "toml", + "yaml-rust", +] + +[[package]] +name = "const-random" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aaf16c9c2c612020bcfd042e170f6e32de9b9d75adb5277cdbbd2e2c8c8299a" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom", + "once_cell", + "tiny-keccak", +] + +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + +[[package]] +name = "cpufeatures" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "csv" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + +[[package]] +name = "dashmap" +version = "5.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +dependencies = [ + "cfg-if", + "hashbrown 0.14.3", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "deltalake" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b9304c1b0a1dcc3fb4391e7039fa8dea0cebccf0cfe690a081c5fcf7c83c9e" +dependencies = [ + "deltalake-core", +] + +[[package]] +name = "deltalake-core" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fa36b3134e005ea27b129d44416830edf4954beeecfc7933b6fc6e2efa5cc92" +dependencies = [ + "arrow", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "async-trait", + "bytes", + "cfg-if", + "chrono", + "dashmap", + "either", + "errno", + "fix-hidden-lifetime-bug", + "futures", + "hashbrown 0.14.3", + "indexmap", + "itertools", + "lazy_static", + "libc", + "maplit", + "num-bigint", + "num-traits", + "num_cpus", + "object_store", + "once_cell", + "parking_lot", + "parquet", + "percent-encoding", + "pin-project-lite", + "rand", + "regex", + "roaring", + "serde", + "serde_json", + "thiserror", + "tokio", + "tracing", + "url", + "uuid", + "z85", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + +[[package]] +name = "dlv-list" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0688c2a7f92e427f44895cd63841bff7b29f8d7a1648b9e7e07a4a365b2e1257" + +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + +[[package]] +name = "either" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" + +[[package]] +name = "encoding_rs" +version = "0.8.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "env_filter" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea" +dependencies = [ + "log", + "regex", +] + +[[package]] +name = "env_logger" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05e7cf40684ae96ade6232ed84582f40ce0a66efcd43a5117aef610534f8e0b8" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "humantime", + "log", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "fallible-iterator" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" + +[[package]] +name = "fastrand" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" + +[[package]] +name = "finl_unicode" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fcfdc7a0362c9f4444381a9e697c79d435fe65b52a37466fc2c1184cee9edc6" + +[[package]] +name = "fix-hidden-lifetime-bug" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4ae9c2016a663983d4e40a9ff967d6dcac59819672f0b47f2b17574e99c33c8" +dependencies = [ + "fix-hidden-lifetime-bug-proc_macros", +] + +[[package]] +name = "fix-hidden-lifetime-bug-proc_macros" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4c81935e123ab0741c4c4f0d9b8377e5fb21d3de7e062fa4b1263b1fbcba1ea" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "flatbuffers" +version = "23.5.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" +dependencies = [ + "bitflags 1.3.2", + "rustc_version", +] + +[[package]] +name = "flate2" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" + +[[package]] +name = "futures-executor" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" + +[[package]] +name = "futures-macro" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + +[[package]] +name = "futures-sink" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" + +[[package]] +name = "futures-task" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" + +[[package]] +name = "futures-util" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "gimli" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" + +[[package]] +name = "h2" +version = "0.3.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "half" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash 0.7.8", +] + +[[package]] +name = "hashbrown" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" +dependencies = [ + "ahash 0.8.8", + "allocator-api2", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "hermit-abi" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd5256b483761cd23699d0da46cc6fd2ee3be420bbe6d020ae4a091e70b7e9fd" + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + +[[package]] +name = "http" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8947b1a6fad4393052c7ba1f4cd97bed3e953a95c79c92ad9b051a04611d9fbb" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "hyper" +version = "0.14.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf96e135eb83a2a8ddf766e426a841d8ddd7449d5f00d34ea02b41d2f19eef80" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http", + "hyper", + "rustls", + "tokio", + "tokio-rustls", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "idna" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "indexmap" +version = "2.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "233cf39063f058ea2caae4091bf4a3ef70a653afbc026f5c4a4135d114e3c177" +dependencies = [ + "equivalent", + "hashbrown 0.14.3", +] + +[[package]] +name = "integer-encoding" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" + +[[package]] +name = "ipnet" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" + +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" + +[[package]] +name = "jobserver" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6" +dependencies = [ + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "406cda4b368d531c842222cf9d2600a9a4acce8d29423695379c6868a143a9ee" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "json5" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96b0db21af676c1ce64250b5f40f3ce2cf27e4e47cb91ed91eb6fe9350b430c1" +dependencies = [ + "pest", + "pest_derive", + "serde", +] + +[[package]] +name = "lakehouse-loader" +version = "0.1.0" +dependencies = [ + "arrow", + "arrow-buffer", + "arrow-schema", + "async-trait", + "bytes", + "clap", + "config", + "deltalake", + "env_logger", + "futures", + "log", + "native-tls", + "object_store", + "parquet", + "postgres", + "postgres-native-tls", + "tempfile", + "thiserror", + "tokio", + "tokio-postgres", + "url", + "uuid", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "lexical-core" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "libc" +version = "0.2.153" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" + +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + +[[package]] +name = "linux-raw-sys" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" + +[[package]] +name = "lock_api" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + +[[package]] +name = "lz4_flex" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "912b45c753ff5f7f5208307e8ace7d2a2e30d024e26d3509f3dce546c044ce15" +dependencies = [ + "twox-hash", +] + +[[package]] +name = "maplit" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" + +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + +[[package]] +name = "memchr" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" +dependencies = [ + "adler", +] + +[[package]] +name = "mio" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.48.0", +] + +[[package]] +name = "native-tls" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e" +dependencies = [ + "lazy_static", + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "num" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d869c01cc0c455284163fd0092f1f93835385ccab5a98a0dcc497b2f8bf055a9" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +dependencies = [ + "autocfg", + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "object" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "memchr", +] + +[[package]] +name = "object_store" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d139f545f64630e2e3688fd9f81c470888ab01edeb72d13b4e86c566f1130000" +dependencies = [ + "async-trait", + "base64 0.21.7", + "bytes", + "chrono", + "futures", + "humantime", + "hyper", + "itertools", + "parking_lot", + "percent-encoding", + "quick-xml", + "rand", + "reqwest", + "ring", + "serde", + "serde_json", + "snafu", + "tokio", + "tracing", + "url", + "walkdir", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "openssl" +version = "0.10.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f" +dependencies = [ + "bitflags 2.4.2", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + +[[package]] +name = "openssl-probe" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + +[[package]] +name = "openssl-sys" +version = "0.9.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "ordered-float" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", +] + +[[package]] +name = "ordered-multimap" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccd746e37177e1711c20dd619a1620f34f5c8b569c53590a72dedd5344d8924a" +dependencies = [ + "dlv-list", + "hashbrown 0.12.3", +] + +[[package]] +name = "parking_lot" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.48.5", +] + +[[package]] +name = "parquet" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "547b92ebf0c1177e3892f44c8f79757ee62e678d564a9834189725f2c5b7a750" +dependencies = [ + "ahash 0.8.8", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "arrow-select", + "base64 0.21.7", + "brotli", + "bytes", + "chrono", + "flate2", + "futures", + "half", + "hashbrown 0.14.3", + "lz4_flex", + "num", + "num-bigint", + "object_store", + "paste", + "seq-macro", + "snap", + "thrift", + "tokio", + "twox-hash", + "zstd", +] + +[[package]] +name = "paste" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" + +[[package]] +name = "pathdiff" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd" + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "pest" +version = "2.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219c0dcc30b6a27553f9cc242972b67f75b60eb0db71f0b5462f38b058c41546" +dependencies = [ + "memchr", + "thiserror", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e1288dbd7786462961e69bfd4df7848c1e37e8b74303dbdab82c3a9cdd2809" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1381c29a877c6d34b8c176e734f35d7f7f5b3adaefe940cb4d1bb7af94678e2e" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn 2.0.48", +] + +[[package]] +name = "pest_meta" +version = "2.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0934d6907f148c22a3acbda520c7eed243ad7487a30f51f6ce52b58b7077a8a" +dependencies = [ + "once_cell", + "pest", + "sha2", +] + +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2900ede94e305130c13ddd391e0ab7cbaeb783945ae07a279c268cb05109c6cb" + +[[package]] +name = "postgres" +version = "0.19.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7915b33ed60abc46040cbcaa25ffa1c7ec240668e0477c4f3070786f5916d451" +dependencies = [ + "bytes", + "fallible-iterator", + "futures-util", + "log", + "tokio", + "tokio-postgres", +] + +[[package]] +name = "postgres-native-tls" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d442770e2b1e244bb5eb03b31c79b65bb2568f413b899eaba850fa945a65954" +dependencies = [ + "futures", + "native-tls", + "tokio", + "tokio-native-tls", + "tokio-postgres", +] + +[[package]] +name = "postgres-protocol" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49b6c5ef183cd3ab4ba005f1ca64c21e8bd97ce4699cfea9e8d9a2c4958ca520" +dependencies = [ + "base64 0.21.7", + "byteorder", + "bytes", + "fallible-iterator", + "hmac", + "md-5", + "memchr", + "rand", + "sha2", + "stringprep", +] + +[[package]] +name = "postgres-types" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d2234cdee9408b523530a9b6d2d6b373d1db34f6a8e51dc03ded1828d7fb67c" +dependencies = [ + "bytes", + "fallible-iterator", + "postgres-protocol", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "proc-macro2" +version = "1.0.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quick-xml" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "regex" +version = "1.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + +[[package]] +name = "reqwest" +version = "0.11.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6920094eb85afde5e4a138be3f2de8bbdf28000f0029e72c45025a56b042251" +dependencies = [ + "base64 0.21.7", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "hyper", + "hyper-rustls", + "ipnet", + "js-sys", + "log", + "mime", + "once_cell", + "percent-encoding", + "pin-project-lite", + "rustls", + "rustls-native-certs", + "rustls-pemfile", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "system-configuration", + "tokio", + "tokio-rustls", + "tokio-util", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", + "winreg", +] + +[[package]] +name = "retain_mut" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c31b5c4033f8fdde8700e4657be2c497e7288f01515be52168c631e2e4d4086" + +[[package]] +name = "ring" +version = "0.17.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "688c63d65483050968b2a8937f7995f443e27041a0f7700aa59b0822aedebb74" +dependencies = [ + "cc", + "getrandom", + "libc", + "spin", + "untrusted", + "windows-sys 0.48.0", +] + +[[package]] +name = "roaring" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6106b5cf8587f5834158895e9715a3c6c9716c8aefab57f1f7680917191c7873" +dependencies = [ + "bytemuck", + "byteorder", + "retain_mut", +] + +[[package]] +name = "ron" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88073939a61e5b7680558e6be56b419e208420c2adb92be54921fa6b72283f1a" +dependencies = [ + "base64 0.13.1", + "bitflags 1.3.2", + "serde", +] + +[[package]] +name = "rust-ini" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6d5f2436026b4f6e79dc829837d467cc7e9a55ee40e750d716713540715a2df" +dependencies = [ + "cfg-if", + "ordered-multimap", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" + +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + +[[package]] +name = "rustix" +version = "0.38.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" +dependencies = [ + "bitflags 2.4.2", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustls" +version = "0.21.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fecbfb7b1444f477b345853b1fce097a2c6fb637b2bfb87e6bc5db0f043fae4" +dependencies = [ + "log", + "ring", + "rustls-webpki", + "sct", +] + +[[package]] +name = "rustls-native-certs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" +dependencies = [ + "openssl-probe", + "rustls-pemfile", + "schannel", + "security-framework", +] + +[[package]] +name = "rustls-pemfile" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" +dependencies = [ + "base64 0.21.7", +] + +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "ryu" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "schannel" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "security-framework" +version = "2.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05b64fb303737d99b81884b2c63433e9ae28abebe5eb5045dcdd175dc2ecf4de" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e932934257d3b408ed8f30db49d85ea163bfe74961f017f405b025af298f0c7a" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "semver" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97ed7a9823b74f99c7742f5336af7be5ecd3eeafcb1507d1fa93347b1d589b0" + +[[package]] +name = "seq-macro" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" + +[[package]] +name = "serde" +version = "1.0.196" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "870026e60fa08c69f064aa766c10f10b1d62db9ccd4d0abb206472bee0ce3b32" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.196" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33c85360c95e7d137454dc81d9a4ed2b8efd8fbe19cee57357b32b9771fccb67" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + +[[package]] +name = "serde_json" +version = "1.0.113" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69801b70b1c3dac963ecb03a364ba0ceda9cf60c71cfe475e99864759c8b8a79" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "signal-hook-registry" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" +dependencies = [ + "libc", +] + +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" + +[[package]] +name = "snafu" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6" +dependencies = [ + "doc-comment", + "snafu-derive", +] + +[[package]] +name = "snafu-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990079665f075b699031e9c08fd3ab99be5029b96f3b78dc0709e8f77e4efebf" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "snap" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" + +[[package]] +name = "socket2" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9" +dependencies = [ + "libc", + "windows-sys 0.48.0", +] + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "stringprep" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb41d74e231a107a1b4ee36bd1214b11285b77768d2e3824aedafa988fd36ee6" +dependencies = [ + "finl_unicode", + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "strsim" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" + +[[package]] +name = "subtle" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "sync_wrapper" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" + +[[package]] +name = "system-configuration" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "tempfile" +version = "3.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a365e8cd18e44762ef95d87f284f4b5cd04107fec2ff3052bd6a3e6069669e67" +dependencies = [ + "cfg-if", + "fastrand", + "rustix", + "windows-sys 0.52.0", +] + +[[package]] +name = "thiserror" +version = "1.0.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + +[[package]] +name = "thrift" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" +dependencies = [ + "byteorder", + "integer-encoding", + "ordered-float", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.36.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "num_cpus", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys 0.48.0", +] + +[[package]] +name = "tokio-macros" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + +[[package]] +name = "tokio-postgres" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d340244b32d920260ae7448cb72b6e238bddc3d4f7603394e7dd46ed8e48f5b8" +dependencies = [ + "async-trait", + "byteorder", + "bytes", + "fallible-iterator", + "futures-channel", + "futures-util", + "log", + "parking_lot", + "percent-encoding", + "phf", + "pin-project-lite", + "postgres-protocol", + "postgres-types", + "rand", + "socket2", + "tokio", + "tokio-util", + "whoami", +] + +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", + "tracing", +] + +[[package]] +name = "toml" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" +dependencies = [ + "serde", +] + +[[package]] +name = "tower-service" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" + +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "log", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", +] + +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "static_assertions", +] + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "ucd-trie" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" + +[[package]] +name = "unicode-bidi" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-normalization" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "url" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + +[[package]] +name = "uuid" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" +dependencies = [ + "getrandom", + "serde", +] + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "walkdir" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" + +[[package]] +name = "wasm-bindgen" +version = "0.2.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1e124130aee3fb58c5bdd6b639a0509486b0338acaaae0c84a5124b0f588b7f" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9e7e1900c352b609c8488ad12639a311045f40a35491fb69ba8c12f758af70b" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.48", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877b9c3f61ceea0e56331985743b13f3d25c406a7098d45180fb5f09bc19ed97" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b30af9e2d358182b5c7449424f017eba305ed32a7010509ede96cdc4696c46ed" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "642f325be6301eb8107a83d12a8ac6c1e1c54345a7ef1a9261962dfefda09e66" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f186bd2dcf04330886ce82d6f33dd75a7bfcf69ecf5763b89fcde53b6ac9838" + +[[package]] +name = "wasm-streams" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b65dc4c90b63b118468cf747d8bf3566c1913ef60be765b5730ead9e0a3ba129" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + +[[package]] +name = "web-sys" +version = "0.3.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96565907687f7aceb35bc5fc03770a8a0471d82e479f25832f54a0e3f4b28446" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "whoami" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a44ab49fad634e88f55bf8f9bb3abd2f27d7204172a112c7c9987e01c1c94ea9" +dependencies = [ + "redox_syscall", + "wasite", + "web-sys", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets 0.52.0", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.0", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" + +[[package]] +name = "winreg" +version = "0.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + +[[package]] +name = "yaml-rust" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" +dependencies = [ + "linked-hash-map", +] + +[[package]] +name = "z85" +version = "3.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a599daf1b507819c1121f0bf87fa37eb19daac6aff3aefefd4e6e2e0f2020fc" + +[[package]] +name = "zerocopy" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + +[[package]] +name = "zstd" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bffb3309596d527cfcba7dfc6ed6052f1d39dfbd7c867aa2e865e4a449c10110" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43747c7422e2924c11144d5229878b98180ef8b06cca4ab5af37afc8a8d8ea3e" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.9+zstd.1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..a5c64ae --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,34 @@ +[package] +name = "lakehouse-loader" +version = "0.1.0" +edition = "2021" + +[dependencies] + +arrow = "50.0.0" +arrow-buffer = "50.0.0" +arrow-schema = "50.0.0" +async-trait = "0.1" +bytes = "1.4.0" +clap = { version = "4.5", features = [ "derive" ] } +config = "0.13.3" +deltalake = "0.17.0" +env_logger = "0.11.1" +futures = "0.3" +log = "0.4" +native-tls = "0.2.11" +object_store = { version = "0.9", features = ["aws"] } +parquet = "50.0.0" +postgres = "0.19.7" +postgres-native-tls = "0.5.0" +tempfile = "3" +thiserror = "1" +tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "signal", "process"] } +tokio-postgres = "0.7.10" +url = "2.5.0" +uuid = "1.2.1" + +[profile.release] +codegen-units = 1 +lto = true +strip = true diff --git a/README.md b/README.md new file mode 100644 index 0000000..8066798 --- /dev/null +++ b/README.md @@ -0,0 +1,34 @@ +# Lakehouse Loader + +Load data from Parquet and Postgres to Delta Lake + +## Features + +- Supports S3 and file output +- Supports larger-than-memory source data + +## Usage + +Download the binary from the [Releases page](./releases) + +To load data from Postgres to Delta Lake: + +```bash +export PGPASSWORD="my_password" +./lakehouse-loader pg-to-delta postgres://test-user@localhost:5432/test-db -q "SELECT * FROM some_table" s3://my-bucket/path/to/table +``` + +To load data from Parquet to Delta Lake: + +```bash +./lakehouse-loader parquet-to-delta some_file.parquet s3://my-bucket/path/to/table +``` + +Supports standard AWS environment variables (e.g. AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_PROFILE, AWS_ENDPOINT etc). + +Use the `file://` protocol to load data into a file instead. + +## Limitations + +- Supported datatypes: bool, char, int2, int4, int8, float4, float8, timestamp, timestamptz, text, bytea. Cast the columns in your query to `text` or another supported type if your query returns different types +- Doesn't support appending to tables, only writing new Delta Tables (pass `-o` to overwrite) diff --git a/ci/clippy.sh b/ci/clippy.sh new file mode 100755 index 0000000..50aa8a1 --- /dev/null +++ b/ci/clippy.sh @@ -0,0 +1,10 @@ +#!/bin/bash -e + +# We have to run Clippy twice here: first, with --fix so that it can fix +# some issues automatically, then with -D warnings so that it can error out +# on detected issues. Running it at the same time isn't possible: +# +# https://users.rust-lang.org/t/pre-commit-clippy-fix/66584 + +cargo clippy --fix --allow-dirty --allow-staged --allow-no-vcs +cargo clippy -- -D warnings diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..fe9b2cf --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,34 @@ +services: + postgres: + image: postgres:16 + environment: + POSTGRES_USER: test-user + POSTGRES_PASSWORD: test-password + POSTGRES_DB: test-db + ports: + - "5432:5432" + restart: unless-stopped + volumes: + - ./tests/postgres-init-scripts:/docker-entrypoint-initdb.d + + minio: + image: minio/minio:latest + ports: + - 9000:9000 + - 9001:9001 + environment: + MINIO_CONSOLE_ADDRESS: ":9001" + command: minio server /data + + minio-setup: + image: minio/mc:latest + depends_on: + - minio + entrypoint: > + /bin/sh -c " + /usr/bin/mc config host add test-minio http://minio:9000 minioadmin minioadmin; + /usr/bin/mc rm -r --force test-minio/sdl-test-bucket; + /usr/bin/mc mb test-minio/sdl-test-bucket; + /usr/bin/mc admin user add test-minio test-user test-pass; + /usr/bin/mc admin policy attach test-minio readwrite --user test-user; + exit 0; " diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..77c1557 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,553 @@ +use arrow::record_batch::RecordBatch; +use arrow_schema::SchemaRef; +use bytes::{Bytes, BytesMut}; +use clap::{Parser, Subcommand}; +use deltalake::kernel::{Action, Add, Protocol}; +use deltalake::logstore::{default_logstore, LogStore}; +use deltalake::operations::create::CreateBuilder; +use deltalake::operations::transaction::commit; +use deltalake::protocol::{DeltaOperation, SaveMode}; +use deltalake::storage::{factories, ObjectStoreFactory}; +use deltalake::DeltaResult; +use futures::stream::iter; +use futures::{pin_mut, TryStream, TryStreamExt}; +use futures::{stream::BoxStream, StreamExt}; +use parquet::arrow::async_reader::ParquetRecordBatchStreamBuilder; +use parquet::arrow::ArrowWriter; +use parquet::basic::Compression; +use parquet::file::properties::WriterProperties; +use thiserror::Error; + +use deltalake::writer::create_add; + +use log::{debug, info, warn}; +use object_store::path::Path; +use object_store::{ + prefix::PrefixStore, DynObjectStore, GetOptions, GetResult, ListResult, MultipartId, + ObjectMeta, ObjectStore, PutOptions, PutResult, +}; +use std::fmt::Display; +use std::fs::File; +use std::ops::Range; + +use url::Url; + +use std::sync::Arc; +use tempfile::{NamedTempFile, TempPath}; +use tokio::fs::File as AsyncFile; +use tokio::io::{AsyncReadExt, AsyncWrite, AsyncWriteExt, BufReader}; +use tokio::sync::Semaphore; +use uuid::Uuid; + +pub mod pg_arrow_source; +use pg_arrow_source::{ArrowBuilder, PgArrowSource}; + +#[derive(Debug, Parser)] +#[command(name = "lhl")] +#[command(about = "Lakehouse Data Loader", long_about = None)] +pub struct Cli { + #[command(subcommand)] + command: Commands, +} + +#[derive(Debug, Subcommand)] +enum Commands { + #[command(arg_required_else_help = true)] + ParquetToDelta { + source_file: String, + target_url: Url, + #[clap(long, short, action)] + overwrite: bool, + }, + #[command(arg_required_else_help = true)] + PgToDelta { + connection_string: Url, + target_url: Url, + #[clap(long, short, action, help("SQL text to extract the data"))] + query: String, + #[clap(long, short, action)] + overwrite: bool, + #[clap( + long, + short, + action, + default_value_t = 10000, + help("Number of rows to process per batch") + )] + batch_size: usize, + }, +} + +const MAX_ROW_GROUP_SIZE: usize = 122880; +const PARTITION_FILE_BUFFER_SIZE: usize = 128 * 1024; +const PARTITION_FILE_MIN_PART_SIZE: usize = 5 * 1024 * 1024; +const PARTITION_FILE_UPLOAD_MAX_CONCURRENCY: usize = 2; + +/// Open a temporary file to write partition and return a handle and a writer for it. +fn temp_partition_file_writer( + arrow_schema: SchemaRef, +) -> Result<(TempPath, ArrowWriter), DataLoadingError> { + let partition_file = NamedTempFile::new().expect("Open a temporary file to write partition"); + let path = partition_file.into_temp_path(); + + let file_writer = File::options().write(true).open(&path)?; + + let writer_properties = WriterProperties::builder() + .set_max_row_group_size(MAX_ROW_GROUP_SIZE) + .set_compression(Compression::SNAPPY) + // .set_bloom_filter_enabled(true) TODO: not enough disk + .set_statistics_enabled(parquet::file::properties::EnabledStatistics::Page) + .build(); + let writer = ArrowWriter::try_new(file_writer, arrow_schema, Some(writer_properties))?; + Ok((path, writer)) +} + +#[derive(Error, Debug)] +pub enum DataLoadingError { + #[error("parquet error")] + ParquetError(#[from] parquet::errors::ParquetError), + #[error("postgres error")] + PostgresError(tokio_postgres::Error), + #[error("arrow error")] + ArrowError(#[from] arrow::error::ArrowError), + #[error("Delta Table error")] + DeltaTableError(#[from] deltalake::DeltaTableError), + #[error("I/O error")] + IoError(#[from] std::io::Error), + #[error("Object store error")] + ObjectStoreError(#[from] object_store::Error), + #[error("join error")] + JoinError(#[from] tokio::task::JoinError), +} + +// Export a stream of record batches into a Delta Table in a certain object store +// mostly copied from Seafowl + +pub async fn record_batches_to_object_store( + record_batch_stream: impl TryStream>, + schema: SchemaRef, + store: Arc, + prefix: &Path, + max_partition_size: u32, +) -> Result, DataLoadingError> { + let mut current_partition_size = 0; + let (mut current_partition_file_path, mut writer) = temp_partition_file_writer(schema.clone())?; + let mut partition_file_paths = vec![current_partition_file_path]; + let mut partition_metadata = vec![]; + let mut tasks = vec![]; + + pin_mut!(record_batch_stream); + + while let Some(mut batch) = record_batch_stream.try_next().await? { + let mut leftover_partition_capacity = + (max_partition_size - current_partition_size) as usize; + + while batch.num_rows() > leftover_partition_capacity { + if leftover_partition_capacity > 0 { + // Fill up the remaining capacity in the slice + writer + .write(&batch.slice(0, leftover_partition_capacity)) + .map_err(DataLoadingError::from)?; + // Trim away the part that made it to the current partition + batch = batch.slice( + leftover_partition_capacity, + batch.num_rows() - leftover_partition_capacity, + ); + } + + // Roll-over into the next partition: close partition writer, reset partition size + // counter and open new temp file + writer. + let file_metadata = writer.close().map_err(DataLoadingError::from)?; + partition_metadata.push(file_metadata); + + current_partition_size = 0; + leftover_partition_capacity = max_partition_size as usize; + + (current_partition_file_path, writer) = temp_partition_file_writer(schema.clone())?; + partition_file_paths.push(current_partition_file_path); + } + + current_partition_size += batch.num_rows() as u32; + writer.write(&batch).map_err(DataLoadingError::from)?; + } + let file_metadata = writer.close().map_err(DataLoadingError::from)?; + partition_metadata.push(file_metadata); + + info!("Starting upload of partition objects"); + let partitions_uuid = Uuid::new_v4(); + + let sem = Arc::new(Semaphore::new(PARTITION_FILE_UPLOAD_MAX_CONCURRENCY)); + for (part, (partition_file_path, metadata)) in partition_file_paths + .into_iter() + .zip(partition_metadata) + .enumerate() + { + let permit = Arc::clone(&sem).acquire_owned().await.ok(); + + let store = store.clone(); + let prefix = prefix.clone(); + let handle: tokio::task::JoinHandle> = + tokio::task::spawn(async move { + // Move the ownership of the semaphore permit into the task + let _permit = permit; + + // This is taken from delta-rs `PartitionWriter::next_data_path` + let file_name = format!("part-{part:0>5}-{partitions_uuid}-c000.snappy.parquet"); + let location = prefix.child(file_name.clone()); + + let size = tokio::fs::metadata( + partition_file_path + .to_str() + .expect("Temporary Parquet file in the FS root"), + ) + .await? + .len() as i64; + + let file = AsyncFile::open(partition_file_path).await?; + let mut reader = BufReader::with_capacity(PARTITION_FILE_BUFFER_SIZE, file); + let mut part_buffer = BytesMut::with_capacity(PARTITION_FILE_MIN_PART_SIZE); + + let (multipart_id, mut writer) = store.put_multipart(&location).await?; + + let error: std::io::Error; + let mut eof_counter = 0; + + loop { + match reader.read_buf(&mut part_buffer).await { + Ok(0) if part_buffer.is_empty() => { + // We've reached EOF and there are no pending writes to flush. + // As per the docs size = 0 doesn't seem to guarantee that we've reached EOF, so we use + // a heuristic: if we encounter Ok(0) 3 times in a row it's safe to assume it's EOF. + // Another potential workaround is to use `stream_position` + `stream_len` to determine + // whether we've reached the end (`stream_len` is nightly-only experimental API atm) + eof_counter += 1; + if eof_counter >= 3 { + break; + } else { + continue; + } + } + Ok(size) + if size != 0 && part_buffer.len() < PARTITION_FILE_MIN_PART_SIZE => + { + // Keep filling the part buffer until it surpasses the minimum required size + eof_counter = 0; + continue; + } + Ok(_) => { + let part_size = part_buffer.len(); + debug!("Uploading part with {} bytes", part_size); + match writer.write_all(&part_buffer[..part_size]).await { + Ok(_) => { + part_buffer.clear(); + continue; + } + Err(err) => error = err, + } + } + Err(err) => error = err, + } + + warn!( + "Aborting multipart partition upload due to an error: {:?}", + error + ); + store.abort_multipart(&location, &multipart_id).await.ok(); + return Err(DataLoadingError::IoError(error)); + } + + writer.shutdown().await?; + + // Create the corresponding Add action; currently we don't support partition columns + // which simplifies things. + let add = create_add(&Default::default(), file_name, size, &metadata).unwrap(); + + Ok(add) + }); + tasks.push(handle); + } + + futures::future::join_all(tasks) + .await + .into_iter() + .map(|t| t?) + .collect::, DataLoadingError>>() +} + +fn keys_from_env(predicate: fn(&str) -> bool) -> Vec<(String, String)> { + std::env::vars_os() + .flat_map(|(k, v)| { + if let (Some(key), Some(value)) = (k.to_str(), v.to_str()) { + predicate(key).then(|| (key.to_ascii_lowercase(), value.to_string())) + } else { + None + } + }) + .collect() +} + +pub fn object_store_keys_from_env(url_scheme: &str) -> Vec<(String, String)> { + match url_scheme { + "s3" | "s3a" => keys_from_env(|k| k.starts_with("AWS_")), + "gs" => keys_from_env(|k| k.starts_with("GOOGLE_")), + "az" | "adl" | "azure" | "abfs" | "abfss" => keys_from_env(|k| k.starts_with("AZURE_")), + _ => vec![], + } +} + +#[derive(Debug, Clone)] +pub struct CompatObjectStore { + pub inner: Arc, + pub table_url: Url, + pub path: Path, +} + +impl Display for CompatObjectStore { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.inner.fmt(f) + } +} + +impl ObjectStoreFactory for CompatObjectStore { + fn parse_url_opts( + &self, + _url: &Url, + _options: &deltalake::storage::StorageOptions, + ) -> DeltaResult<(deltalake::storage::ObjectStoreRef, Path)> { + Ok((self.inner.clone(), Path::from("/"))) + } +} + +impl CompatObjectStore { + fn log_store(&self) -> Arc { + let prefix_store: PrefixStore = + PrefixStore::new(self.clone(), self.path.clone()); + + (default_logstore( + Arc::from(prefix_store), + &self.table_url, + &Default::default(), + )) as _ + } +} + +#[async_trait::async_trait] +impl ObjectStore for CompatObjectStore { + async fn put(&self, location: &Path, bytes: Bytes) -> object_store::Result { + self.inner.put(location, bytes).await + } + + async fn put_opts( + &self, + location: &Path, + bytes: Bytes, + opts: PutOptions, + ) -> object_store::Result { + self.inner.put_opts(location, bytes, opts).await + } + + async fn put_multipart( + &self, + location: &Path, + ) -> object_store::Result<(MultipartId, Box)> { + self.inner.put_multipart(location).await + } + + async fn abort_multipart( + &self, + location: &Path, + multipart_id: &MultipartId, + ) -> object_store::Result<()> { + self.inner.abort_multipart(location, multipart_id).await + } + + async fn get(&self, location: &Path) -> object_store::Result { + self.inner.get(location).await + } + + async fn get_opts( + &self, + location: &Path, + options: GetOptions, + ) -> object_store::Result { + self.inner.get_opts(location, options).await + } + + async fn get_range(&self, location: &Path, range: Range) -> object_store::Result { + self.inner.get_range(location, range).await + } + + async fn head(&self, location: &Path) -> object_store::Result { + self.inner.head(location).await + } + + async fn delete(&self, location: &Path) -> object_store::Result<()> { + self.inner.delete(location).await + } + + fn delete_stream<'a>( + &'a self, + locations: BoxStream<'a, object_store::Result>, + ) -> BoxStream<'a, object_store::Result> { + locations + .map(|location| async { + let location = location?; + self.delete(&location).await?; + Ok(location) + }) + .buffered(10) + .boxed() + } + + fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, object_store::Result> { + self.inner.list(prefix) + } + + async fn list_with_delimiter(&self, prefix: Option<&Path>) -> object_store::Result { + self.inner.list_with_delimiter(prefix).await + } + + async fn copy(&self, from: &Path, to: &Path) -> object_store::Result<()> { + self.inner.copy(from, to).await + } + + // S3 doesn't support atomic rename/copy_if_not_exists without a DynamoDB lock + // but we don't care since we're just creating a table from a single writer + async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> object_store::Result<()> { + if self.table_url.scheme() == "s3" || self.table_url.scheme() == "s3a" { + return self.inner.copy(from, to).await; + } + self.inner.copy_if_not_exists(from, to).await + } + + async fn rename_if_not_exists(&self, from: &Path, to: &Path) -> object_store::Result<()> { + if self.table_url.scheme() == "s3" || self.table_url.scheme() == "s3a" { + return self.inner.rename(from, to).await; + } + self.inner.rename_if_not_exists(from, to).await + } +} + +async fn record_batches_to_delta( + record_batch_stream: impl TryStream>, + schema: SchemaRef, + target_url: Url, + overwrite: bool, +) -> Result<(), DataLoadingError> { + let config = object_store_keys_from_env(target_url.scheme()); + + // Handle some deltalake weirdness + let (store, path) = object_store::parse_url_opts(&target_url, config).unwrap(); + let store = Arc::from(CompatObjectStore { + inner: Arc::from(store), + table_url: target_url.clone(), + path: path.clone(), + }); + factories().insert(target_url.clone(), store.clone()); + let log_store = store.log_store(); + + // Delete existing contents + let existing = store + .list(Some(&path)) + .map_ok(|m| m.location) + .boxed() + .try_collect::>() + .await?; + if !existing.is_empty() && !overwrite { + info!( + "{} already contains data, pass --overwrite to overwrite", + &target_url + ); + return Ok(()); + } + + store + .delete_stream(iter(existing).map(Ok).boxed()) + .try_collect::>() + .await?; + + let delta_transactions = record_batches_to_object_store( + record_batch_stream, + schema.clone(), + store.clone(), + &path, + 2 * 1024 * 1024, + ) + .await?; + + let delta_schema = deltalake::kernel::Schema::try_from(schema)?; + let table_name = target_url.path_segments().unwrap().last().unwrap(); + + let table = CreateBuilder::new() + .with_log_store(log_store.clone()) + .with_table_name(table_name) + .with_columns(delta_schema.fields().clone()) + // Set the writer protocol to 1 (defaults to 2 which means that after this + // we won't be able to write to the table without the datafusion crate support) + .with_actions(vec![Action::Protocol(Protocol { + min_reader_version: 1, + min_writer_version: 1, + reader_features: None, + writer_features: None, + })]) + .await?; + + let actions: Vec = delta_transactions.into_iter().map(Action::Add).collect(); + let op = DeltaOperation::Write { + mode: SaveMode::Append, + partition_by: None, + predicate: None, + }; + let version = commit(log_store.as_ref(), &actions, op, table.state.as_ref(), None).await?; + info!( + "Created Delta Table {:?} at {:?}, version {:?}", + table_name, target_url, version + ); + Ok(()) +} + +pub async fn do_main(args: Cli) -> Result<(), DataLoadingError> { + match args.command { + Commands::ParquetToDelta { + source_file, + target_url, + overwrite, + } => { + let file = tokio::fs::File::open(source_file).await?; + let record_batch_reader = ParquetRecordBatchStreamBuilder::new(file) + .await? + .build() + .unwrap(); + let schema = record_batch_reader.schema().clone(); + info!("File schema: {}", schema); + record_batches_to_delta( + record_batch_reader.map_err(DataLoadingError::ParquetError), + schema, + target_url, + overwrite, + ) + .await + } + Commands::PgToDelta { + connection_string, + query, + target_url, + overwrite, + batch_size, + } => { + let mut source = PgArrowSource::new(connection_string.as_ref(), &query, batch_size) + .await + .map_err(DataLoadingError::PostgresError)?; + let arrow_schema = source.get_arrow_schema(); + let record_batch_stream = source.get_record_batch_stream(); + info!("Rowset schema: {}", arrow_schema); + record_batches_to_delta(record_batch_stream, arrow_schema, target_url, overwrite).await + } + } + + // TODO + // - custom parquet writer settings (e.g. bloom filters) + // - bypass the disk, write files to ram one-by-one and emit them out + // - (later) sorting on certain cols via datafusion + // - (later) using actual tpc code + https://github.com/andygrove/tpctools +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..a197a43 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,9 @@ +use clap::Parser; +use lakehouse_loader::{do_main, Cli, DataLoadingError}; + +#[tokio::main] +async fn main() -> Result<(), DataLoadingError> { + env_logger::init(); + let args = Cli::parse(); + do_main(args).await +} diff --git a/src/pg_arrow_source.rs b/src/pg_arrow_source.rs new file mode 100644 index 0000000..fb76c3a --- /dev/null +++ b/src/pg_arrow_source.rs @@ -0,0 +1,251 @@ +use std::env; +use std::pin::Pin; +use std::sync::Arc; + +use arrow::array::{self, RecordBatch}; +use arrow::datatypes::{DataType, Field, Schema}; +use arrow_schema::TimeUnit; +use futures::Stream; +use futures::StreamExt; +use native_tls::TlsConnector; +use postgres::types::{FromSql, Type}; +use postgres::Row; +use postgres_native_tls::MakeTlsConnector; +use tokio_postgres::{Config, Error, RowStream}; + +#[allow(clippy::enum_variant_names)] +pub enum ArrowBuilder { + BooleanBuilder(array::BooleanBuilder), + Int8Builder(array::Int8Builder), + Int16Builder(array::Int16Builder), + Int32Builder(array::Int32Builder), + Int64Builder(array::Int64Builder), + Float32Builder(array::Float32Builder), + Float64Builder(array::Float64Builder), + TimestampMicrosecondBuilder(array::TimestampMicrosecondBuilder), + StringBuilder(array::StringBuilder), + BinaryBuilder(array::BinaryBuilder), +} +use crate::{ArrowBuilder::*, DataLoadingError}; + +// tokio-postgres provides awkward Rust type conversions for Postgres TIMESTAMP and TIMESTAMPTZ values +// It's easier just to handle the raw values ourselves +struct UnixEpochMicrosecondOffset(i64); +const J2000_EPOCH_OFFSET: i64 = 946_684_800_000_000; // Number of us from 1970-01-01 to 2000-01-01 + +impl FromSql<'_> for UnixEpochMicrosecondOffset { + fn from_sql(_ty: &Type, buf: &[u8]) -> Result> { + let byte_array: [u8; 8] = buf.try_into()?; + let offset = i64::from_be_bytes(byte_array) + J2000_EPOCH_OFFSET; + Ok(Self(offset)) + } + + fn accepts(ty: &Type) -> bool { + *ty == Type::TIMESTAMP || *ty == Type::TIMESTAMPTZ + } +} +impl From for i64 { + fn from(val: UnixEpochMicrosecondOffset) -> Self { + val.0 + } +} + +impl ArrowBuilder { + pub fn from_pg_type(pg_type: &Type) -> Self { + match *pg_type { + Type::BOOL => BooleanBuilder(array::BooleanBuilder::new()), + Type::CHAR => Int8Builder(array::Int8Builder::new()), + Type::INT2 => Int16Builder(array::Int16Builder::new()), + Type::INT4 => Int32Builder(array::Int32Builder::new()), + Type::INT8 => Int64Builder(array::Int64Builder::new()), + Type::FLOAT4 => Float32Builder(array::Float32Builder::new()), + Type::FLOAT8 => Float64Builder(array::Float64Builder::new()), + Type::TIMESTAMP => { + TimestampMicrosecondBuilder(array::TimestampMicrosecondBuilder::new()) + } + Type::TIMESTAMPTZ => TimestampMicrosecondBuilder( + array::TimestampMicrosecondBuilder::new().with_data_type(DataType::Timestamp( + TimeUnit::Microsecond, + Some("UTC".into()), + )), + ), + Type::TEXT => StringBuilder(array::StringBuilder::new()), + Type::BYTEA => BinaryBuilder(array::BinaryBuilder::new()), + _ => panic!("Unsupported type: {}", pg_type), + } + } + // Append a value from a tokio-postgres row to the ArrowBuilder + pub fn append_option(&mut self, row: &Row, column_idx: usize) { + match self { + BooleanBuilder(ref mut builder) => { + builder.append_option(row.get::>(column_idx)) + } + Int8Builder(ref mut builder) => { + builder.append_option(row.get::>(column_idx)) + } + Int16Builder(ref mut builder) => { + builder.append_option(row.get::>(column_idx)) + } + Int32Builder(ref mut builder) => { + builder.append_option(row.get::>(column_idx)) + } + Int64Builder(ref mut builder) => { + builder.append_option(row.get::>(column_idx)) + } + Float32Builder(ref mut builder) => { + builder.append_option(row.get::>(column_idx)) + } + Float64Builder(ref mut builder) => { + builder.append_option(row.get::>(column_idx)) + } + TimestampMicrosecondBuilder(ref mut builder) => builder.append_option( + row.get::>(column_idx) + .map(UnixEpochMicrosecondOffset::into), + ), + StringBuilder(ref mut builder) => { + builder.append_option(row.get::>(column_idx)) + } + BinaryBuilder(ref mut builder) => { + builder.append_option(row.get::>(column_idx)) + } + } + } + pub fn finish(&mut self) -> Arc { + match self { + BooleanBuilder(builder) => Arc::new(builder.finish()), + Int8Builder(builder) => Arc::new(builder.finish()), + Int16Builder(builder) => Arc::new(builder.finish()), + Int32Builder(builder) => Arc::new(builder.finish()), + Int64Builder(builder) => Arc::new(builder.finish()), + Float32Builder(builder) => Arc::new(builder.finish()), + Float64Builder(builder) => Arc::new(builder.finish()), + TimestampMicrosecondBuilder(builder) => Arc::new(builder.finish()), + StringBuilder(builder) => Arc::new(builder.finish()), + BinaryBuilder(builder) => Arc::new(builder.finish()), + } + } +} + +fn pg_type_to_arrow_type(pg_type: &Type) -> DataType { + match *pg_type { + Type::BOOL => DataType::Boolean, + Type::CHAR => DataType::Int8, + Type::INT2 => DataType::Int16, + Type::INT4 => DataType::Int32, + Type::INT8 => DataType::Int64, + Type::FLOAT4 => DataType::Float32, + Type::FLOAT8 => DataType::Float64, + Type::TIMESTAMP => DataType::Timestamp(TimeUnit::Microsecond, None), + Type::TIMESTAMPTZ => DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".into())), + Type::TEXT => DataType::Utf8, + Type::BYTEA => DataType::Binary, + _ => panic!("Unsupported type: {}. Explicitly cast the relevant columns to text in order to store them as strings.", pg_type), + } +} + +pub struct PgArrowSource { + batch_size: usize, + pg_row_stream: Pin>, + pg_types: Vec, + arrow_schema: Arc, +} + +impl PgArrowSource { + pub fn get_arrow_schema(&self) -> Arc { + self.arrow_schema.clone() + } + pub async fn new( + connection_string: &str, + query_text: &str, + batch_size: usize, + ) -> Result { + let mut config = connection_string.parse::().unwrap(); + if let Ok(pg_password) = env::var("PGPASSWORD") { + config.password(pg_password); + } + + let native_tls_connector = TlsConnector::builder().build().unwrap(); + let postgres_tls_connector = MakeTlsConnector::new(native_tls_connector); + let (client, connection) = config.connect(postgres_tls_connector).await?; + tokio::spawn(async move { + if let Err(e) = connection.await { + eprintln!("connection error: {}", e); + } + }); + + // Prepare a statement with the query text to find out its schema + let prepare_result = client.prepare(query_text).await?; + let postgres_columns = prepare_result.columns(); + + // Transform schema + let (pg_types, arrow_fields): (Vec<_>, Vec<_>) = postgres_columns + .iter() + .map(|c| { + let pg_type = c.type_().clone(); + let arrow_type = pg_type_to_arrow_type(&pg_type); + (pg_type, Field::new(c.name(), arrow_type, true)) + }) + .unzip(); + let arrow_schema = Arc::new(Schema::new(arrow_fields.clone())); + + // Start query execution + let params: Vec = Vec::new(); + let pg_row_stream = Box::pin(client.query_raw(query_text, ¶ms).await?); + + Ok(PgArrowSource { + batch_size, + pg_row_stream, + pg_types, + arrow_schema, + }) + } + pub fn get_record_batch_stream( + &mut self, + ) -> impl Stream> + '_ { + let pg_types = &self.pg_types; + let arrow_schema = self.get_arrow_schema(); + (&mut self.pg_row_stream).chunks(self.batch_size).map( + move |chunk| -> Result { + let mut builders: Vec = + pg_types.iter().map(ArrowBuilder::from_pg_type).collect(); + for result in chunk { + let row = result.map_err(DataLoadingError::PostgresError)?; + assert_eq!(row.len(), builders.len()); + for (column_idx, builder) in builders.iter_mut().enumerate() { + builder.append_option(&row, column_idx); + } + } + let arrow_arrays = builders + .iter_mut() + .map(|builder| builder.finish()) + .collect(); + RecordBatch::try_new(arrow_schema.clone(), arrow_arrays) + .map_err(DataLoadingError::ArrowError) + }, + ) + } +} + +#[cfg(test)] +mod tests { + use postgres::types::{FromSql, Type}; + + use super::UnixEpochMicrosecondOffset; + + #[test] + fn test_just_after_j2000() { + let offset = + UnixEpochMicrosecondOffset::from_sql(&Type::TIMESTAMP, &[0, 0, 0, 0, 0, 0, 1, 2]) + .unwrap(); + assert_eq!(offset.0, 946_684_800_000_000 + 256 + 2); + } + #[test] + fn test_just_before_j2000() { + let offset = UnixEpochMicrosecondOffset::from_sql( + &Type::TIMESTAMP, + &[255, 255, 255, 255, 255, 255, 255, 255], + ) + .unwrap(); + assert_eq!(offset.0, 946_684_800_000_000 - 1); + } +} diff --git a/tests/basic_integration.rs b/tests/basic_integration.rs new file mode 100644 index 0000000..956b16a --- /dev/null +++ b/tests/basic_integration.rs @@ -0,0 +1,221 @@ +use arrow::array::{ + Array, BinaryArray, BooleanArray, Float32Array, Float64Array, Int16Array, Int32Array, + Int64Array, Int8Array, StringArray, TimestampMicrosecondArray, +}; +use clap::Parser; +use futures::{StreamExt, TryStreamExt}; +use lakehouse_loader::pg_arrow_source::PgArrowSource; +use lakehouse_loader::{do_main, object_store_keys_from_env, Cli}; +use object_store::path::Path; +use url::Url; + +#[tokio::test] +async fn test_pg_to_delta_e2e() { + let target_url = "s3://sdl-test-bucket/abc"; + // WHEN valid arguments are passed to the command + let parsed_args = Cli::parse_from(vec![ + "lakehouse-loader", + "pg-to-delta", + "postgres://test-user:test-password@localhost:5432/test-db", + "-q", + "select * from t1", + target_url, + ]); + // THEN the command runs successfully + do_main(parsed_args).await.unwrap(); + + let config = object_store_keys_from_env("s3"); + + // Handle some deltalake weirdness + let (store, path) = + object_store::parse_url_opts(&Url::parse(target_url).unwrap(), config).unwrap(); + + let paths = store + .list(Some(&path)) + .map_ok(|m| m.location) + .boxed() + .try_collect::>() + .await + .unwrap(); + + assert_eq!(paths.len(), 3); + // THEN delta log files are written + assert_eq!( + paths[0].to_string(), + "abc/_delta_log/00000000000000000000.json" + ); + assert_eq!( + paths[1].to_string(), + "abc/_delta_log/00000000000000000001.json" + ); + // THEN a delta content file is written + assert!(paths[2].to_string().starts_with("abc/part-00000-")); + assert!(paths[2].to_string().ends_with("-c000.snappy.parquet")); +} + +#[tokio::test] +async fn test_pg_arrow_source() { + // WHEN 25001 rows are split into batches of 10000 + let record_batches: Vec<_> = PgArrowSource::new( + "postgres://test-user:test-password@localhost:5432/test-db", + "select * from t1", + 10000, + ) + .await + .unwrap() + .get_record_batch_stream() + .collect() + .await; + + // THEN there should be 3 batches + assert_eq!(record_batches.len(), 3); + // THEN the first batch should have 10000 rows + assert_eq!(record_batches[0].as_ref().unwrap().num_rows(), 10000); + // THEN the second batch should have 10000 rows + assert_eq!(record_batches[1].as_ref().unwrap().num_rows(), 10000); + // THEN the third batch should have 5001 rows + assert_eq!(record_batches[2].as_ref().unwrap().num_rows(), 5001); + + let rb1 = record_batches[0].as_ref().unwrap(); + + // THEN the first 3 id values should be as expected + let id_array = rb1.column(0).as_any().downcast_ref::().unwrap(); + assert!(!id_array.is_null(0)); + assert_eq!(id_array.value(0), 1); + assert!(!id_array.is_null(1)); + assert_eq!(id_array.value(1), 2); + assert!(!id_array.is_null(2)); + assert_eq!(id_array.value(2), 3); + + // THEN the first 3 bool values should be as expected + let cbool_array = rb1 + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + assert!(cbool_array.is_null(0)); + assert!(!cbool_array.is_null(1)); + assert!(cbool_array.value(1)); + assert!(!cbool_array.is_null(2)); + assert!(!cbool_array.value(2)); + + // THEN the first 3 "char" values should be as expected + let cchar_array = rb1.column(2).as_any().downcast_ref::().unwrap(); + assert!(cchar_array.is_null(0)); + assert!(!cchar_array.is_null(1)); + assert_eq!(cchar_array.value(1), -127); + assert!(!cchar_array.is_null(2)); + assert_eq!(cchar_array.value(2), -126); + + // THEN the first 3 int2 values should be as expected + let cint2_array = rb1.column(3).as_any().downcast_ref::().unwrap(); + assert!(cint2_array.is_null(0)); + assert!(!cint2_array.is_null(1)); + assert_eq!(cint2_array.value(1), 1); + assert!(!cint2_array.is_null(2)); + assert_eq!(cint2_array.value(2), 2); + + // THEN the first 3 int4 values should be as expected + let cint4_array = rb1.column(4).as_any().downcast_ref::().unwrap(); + assert!(cint4_array.is_null(0)); + assert!(!cint4_array.is_null(1)); + assert_eq!(cint4_array.value(1), 1); + assert!(!cint4_array.is_null(2)); + assert_eq!(cint4_array.value(2), 2); + + // THEN the first 3 int8 values should be as expected + let cint8_array = rb1.column(5).as_any().downcast_ref::().unwrap(); + assert!(cint8_array.is_null(0)); + assert!(!cint8_array.is_null(1)); + assert_eq!(cint8_array.value(1), 1); + assert!(!cint8_array.is_null(2)); + assert_eq!(cint8_array.value(2), 2); + + // THEN the first 3 float4 values should be as expected + let cfloat4_array = rb1 + .column(6) + .as_any() + .downcast_ref::() + .unwrap(); + assert!(cfloat4_array.is_null(0)); + assert!(!cfloat4_array.is_null(1)); + assert_eq!(cfloat4_array.value(1), 1.5); + assert!(!cfloat4_array.is_null(2)); + assert_eq!(cfloat4_array.value(2), 2.5); + + // THEN the first 3 float8 values should be as expected + let cfloat8_array = rb1 + .column(7) + .as_any() + .downcast_ref::() + .unwrap(); + assert!(cfloat8_array.is_null(0)); + assert!(!cfloat8_array.is_null(1)); + assert_eq!(cfloat8_array.value(1), 1.5); + assert!(!cfloat8_array.is_null(2)); + assert_eq!(cfloat8_array.value(2), 2.5); + + // Days between unix epoch (1970-01-01) and 2024-01-01 + let elapsed_days = 19723; + let seconds_per_day = 86400; + + // THEN the first 3 timestamp values should be as expected + let ctimestamp_array = rb1 + .column(8) + .as_any() + .downcast_ref::() + .unwrap(); + assert!(ctimestamp_array.is_null(0)); + assert!(!ctimestamp_array.is_null(1)); + assert_eq!( + ctimestamp_array.value(1), + (elapsed_days * seconds_per_day + 1) * 1000000 + ); + assert!(!ctimestamp_array.is_null(2)); + assert_eq!( + ctimestamp_array.value(2), + (elapsed_days * seconds_per_day + 2) * 1000000 + ); + + // THEN the first 3 timestamptz values should be as expected + let ctimestamptz_array = rb1 + .column(9) + .as_any() + .downcast_ref::() + .unwrap(); + assert!(ctimestamptz_array.is_null(0)); + assert!(!ctimestamptz_array.is_null(1)); + assert_eq!( + ctimestamptz_array.value(1), + (elapsed_days * seconds_per_day + 1) * 1000000 + ); + assert!(!ctimestamptz_array.is_null(2)); + assert_eq!( + ctimestamptz_array.value(2), + (elapsed_days * seconds_per_day + 2) * 1000000 + ); + + // THEN the first 3 text values should be as expected + let ctext_array = rb1 + .column(10) + .as_any() + .downcast_ref::() + .unwrap(); + assert!(ctext_array.is_null(0)); + assert!(!ctext_array.is_null(1)); + assert_eq!(ctext_array.value(1), "1"); + assert!(!ctext_array.is_null(2)); + assert_eq!(ctext_array.value(2), "2"); + + // THEN the first 3 bytea values should be as expected + let cbytea_array = rb1 + .column(11) + .as_any() + .downcast_ref::() + .unwrap(); + assert!(cbytea_array.is_null(0)); + assert!(!cbytea_array.is_null(1)); + assert_eq!(cbytea_array.value(1), [0, 0, 0, 1]); + assert!(!cbytea_array.is_null(2)); + assert_eq!(cbytea_array.value(2), [0, 0, 0, 2]); +} diff --git a/tests/postgres-init-scripts/init-pg-data.sql b/tests/postgres-init-scripts/init-pg-data.sql new file mode 100755 index 0000000..43816c8 --- /dev/null +++ b/tests/postgres-init-scripts/init-pg-data.sql @@ -0,0 +1,42 @@ +CREATE TABLE t1( + id BIGSERIAL PRIMARY KEY, + cbool BOOLEAN, + cchar "char", + cint2 SMALLINT, + cint4 INT, + cint8 BIGINT, + cfloat4 REAL, + cfloat8 DOUBLE PRECISION, + ctimestamp TIMESTAMP, + ctimestamptz TIMESTAMPTZ, + ctext TEXT, + cbytea BYTEA +); +-- Create a row of nulls +INSERT INTO t1 DEFAULT VALUES; +-- Create enough data for multiple record batches +INSERT INTO t1( + cbool, + cchar, + cint2, + cint4, + cint8, + cfloat4, + cfloat8, + ctimestamp, + ctimestamptz, + ctext, + cbytea +) SELECT + s % 2 = 1, + (s % 256 - 128)::"char", + s, + s, + s, + s + 0.5, + s + 0.5, + '2024-01-01'::TIMESTAMP + s * INTERVAL '1 second', + '2024-01-01 00:00:00+00'::TIMESTAMPTZ + s * INTERVAL '1 second', + s::TEXT, + int4send(s::INT) +FROM generate_series(1, 25000) AS s;